mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 12:45:56 +00:00
shader_recompiler: Optimize general case of buffer addressing (#3159)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
* shader_recompiler: Simplify dma types Only U32 is needed for S_LOAD_DWORD * shader_recompiler: Perform address shift on IR level Buffer instructions now expect address in the data unit they work on. Doing the shift on IR level will allow us to optimize some operations away on common case * shader_recompiler: Optimize common buffer access pattern * emit_spirv: Use 32-bit integer ops for fault buffer Not many GPUs have 8-bit bitwise or operations so that would probably require some overhead to emulate from the driver * resource_tracking_pass: Fix texel buffer shift
This commit is contained in:
parent
6eaec7a004
commit
a49b13fe66
12 changed files with 271 additions and 233 deletions
|
@ -300,7 +300,7 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
}
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
if (info.uses_dma) {
|
||||
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
|
||||
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
|
||||
}
|
||||
|
|
|
@ -7,7 +7,11 @@
|
|||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
namespace {
|
||||
using PointerType = EmitContext::PointerType;
|
||||
using PointerSize = EmitContext::PointerSize;
|
||||
|
||||
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
|
||||
const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))};
|
||||
const Id semantics{ctx.u32_zero_value};
|
||||
|
@ -61,14 +65,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
|||
return ctx.U32[1];
|
||||
}
|
||||
}();
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] {
|
||||
return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] {
|
||||
return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
@ -76,14 +79,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
|||
Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||
return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics);
|
||||
});
|
||||
}
|
||||
|
@ -92,14 +94,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
Id cmp_value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||
return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
|
||||
});
|
||||
}
|
||||
|
@ -107,14 +108,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if (const Id offset = buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [id, pointer_type] = buffer.Alias(PointerType::U64);
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] {
|
||||
return AccessBoundsCheck<64>(ctx, address, buffer.Size(PointerSize::B64), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
@ -360,7 +360,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
|
|||
|
||||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
const auto& buffer = ctx.buffers[binding];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
|
||||
|
@ -368,7 +368,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
|||
|
||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
const auto& buffer = ctx.buffers[binding];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
|
@ -11,8 +12,6 @@
|
|||
|
||||
#include <magic_enum/magic_enum.hpp>
|
||||
|
||||
#include "emit_spirv_bounds.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
|
||||
|
@ -164,6 +163,7 @@ void EmitGetGotoVariable(EmitContext&) {
|
|||
}
|
||||
|
||||
using PointerType = EmitContext::PointerType;
|
||||
using PointerSize = EmitContext::PointerSize;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
|
@ -179,14 +179,15 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
|||
template <PointerType type>
|
||||
Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||
const auto [id, pointer_type] = buffer[type];
|
||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = buffer.Alias(type);
|
||||
const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpLoad(value_type, ptr)};
|
||||
|
||||
if (Sirit::ValidId(buffer.size_dwords)) {
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
|
||||
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
|
||||
return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value);
|
||||
}
|
||||
return result;
|
||||
|
@ -419,25 +420,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
|||
|
||||
template <u32 N, PointerType alias>
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
constexpr bool is_float = alias == PointerType::F32;
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(alias);
|
||||
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
|
||||
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
|
||||
if (!flags.typed) {
|
||||
// Untyped loads have bounds checking per-component.
|
||||
ids.push_back(LoadAccessBoundsCheck < 32, 1,
|
||||
alias ==
|
||||
PointerType::F32 > (ctx, index_i, spv_buffer.size_dwords, result_i));
|
||||
ids.push_back(LoadAccessBoundsCheck<32, 1, is_float>(
|
||||
ctx, index_i, spv_buffer.Size(PointerSize::B32), result_i));
|
||||
} else {
|
||||
ids.push_back(result_i);
|
||||
}
|
||||
|
@ -446,33 +446,32 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
|||
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
|
||||
if (flags.typed) {
|
||||
// Typed loads have single bounds check for the whole load.
|
||||
return LoadAccessBoundsCheck < 32, N,
|
||||
alias == PointerType::F32 > (ctx, index, spv_buffer.size_dwords, result);
|
||||
return LoadAccessBoundsCheck<32, N, is_float>(ctx, address,
|
||||
spv_buffer.Size(PointerSize::B32), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
const Id result{ctx.OpLoad(ctx.U8, ptr)};
|
||||
return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.size, result);
|
||||
return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), result);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
const Id result{ctx.OpLoad(ctx.U16, ptr)};
|
||||
return LoadAccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, result);
|
||||
return LoadAccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), result);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
|
@ -493,14 +492,13 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
|
|||
|
||||
Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U64];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)};
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
|
||||
const Id result{ctx.OpLoad(ctx.U64, ptr)};
|
||||
return LoadAccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, result);
|
||||
return LoadAccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), result);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
|
@ -526,18 +524,18 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
|
|||
template <u32 N, PointerType alias>
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
constexpr bool is_float = alias == PointerType::F32;
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(alias);
|
||||
|
||||
auto store = [&] {
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
|
||||
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
|
||||
auto store_i = [&] {
|
||||
|
@ -546,8 +544,8 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
|
|||
};
|
||||
if (!flags.typed) {
|
||||
// Untyped stores have bounds checking per-component.
|
||||
AccessBoundsCheck<32, 1, alias == PointerType::F32>(
|
||||
ctx, index_i, spv_buffer.size_dwords, store_i);
|
||||
AccessBoundsCheck<32, 1, is_float>(ctx, index_i, spv_buffer.Size(PointerSize::B32),
|
||||
store_i);
|
||||
} else {
|
||||
store_i();
|
||||
}
|
||||
|
@ -557,8 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
|
|||
|
||||
if (flags.typed) {
|
||||
// Typed stores have single bounds check for the whole store.
|
||||
AccessBoundsCheck<32, N, alias == PointerType::F32>(ctx, index, spv_buffer.size_dwords,
|
||||
store);
|
||||
AccessBoundsCheck<32, N, is_float>(ctx, address, spv_buffer.Size(PointerSize::B32), store);
|
||||
} else {
|
||||
store();
|
||||
}
|
||||
|
@ -566,12 +563,12 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
|
|||
|
||||
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
AccessBoundsCheck<8>(ctx, address, spv_buffer.size, [&] {
|
||||
AccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), [&] {
|
||||
ctx.OpStore(ptr, value);
|
||||
return Id{};
|
||||
});
|
||||
|
@ -579,13 +576,12 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
|
|||
|
||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
AccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, [&] {
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
AccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), [&] {
|
||||
ctx.OpStore(ptr, value);
|
||||
return Id{};
|
||||
});
|
||||
|
@ -609,13 +605,12 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
|
||||
void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U64];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)};
|
||||
AccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, [&] {
|
||||
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
|
||||
AccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), [&] {
|
||||
ctx.OpStore(ptr, value);
|
||||
return Id{};
|
||||
});
|
||||
|
|
|
@ -71,7 +71,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
|||
Bindings& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
if (info.uses_dma) {
|
||||
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
|
||||
} else {
|
||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||
|
@ -169,34 +169,8 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
if (info.uses_fp64) {
|
||||
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
|
||||
}
|
||||
|
||||
if (True(info.dma_types & IR::Type::F64)) {
|
||||
physical_pointer_types[PointerType::F64] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U64)) {
|
||||
physical_pointer_types[PointerType::U64] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::F32)) {
|
||||
physical_pointer_types[PointerType::F32] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U32)) {
|
||||
physical_pointer_types[PointerType::U32] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::F16)) {
|
||||
physical_pointer_types[PointerType::F16] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U16)) {
|
||||
physical_pointer_types[PointerType::U16] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U8)) {
|
||||
physical_pointer_types[PointerType::U8] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
|
||||
if (info.uses_dma) {
|
||||
physical_pointer_type_u32 = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -239,7 +213,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
|
|||
// Can this be done with memory access? Like we do now with ReadConst
|
||||
const auto& srt_flatbuf = buffers[flatbuf_index];
|
||||
ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf);
|
||||
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
|
||||
const auto [id, pointer_type] = srt_flatbuf.Alias(PointerType::U32);
|
||||
|
||||
const auto rsrc1{
|
||||
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
|
||||
|
@ -255,39 +229,70 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
|
|||
}
|
||||
|
||||
void EmitContext::DefineBufferProperties() {
|
||||
if (!profile.needs_buffer_offsets && profile.supports_robust_buffer_access) {
|
||||
return;
|
||||
}
|
||||
for (u32 i = 0; i < buffers.size(); i++) {
|
||||
BufferDefinition& buffer = buffers[i];
|
||||
auto& buffer = buffers[i];
|
||||
const auto& desc = info.buffers[i];
|
||||
const u32 binding = buffer.binding;
|
||||
if (buffer.buffer_type != BufferType::Guest) {
|
||||
continue;
|
||||
}
|
||||
const u32 binding = buffer.binding;
|
||||
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
Name(buffer.offset, fmt::format("buf{}_off", binding));
|
||||
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
|
||||
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
|
||||
|
||||
// Only need to load size if performing bounds checks and the buffer is both guest and not
|
||||
// inline.
|
||||
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
|
||||
const BufferResource& desc = info.buffers[i];
|
||||
if (desc.sharp_idx == std::numeric_limits<u32>::max()) {
|
||||
buffer.size = ConstU32(desc.inline_cbuf.GetSize());
|
||||
} else {
|
||||
buffer.size = GetBufferSize(desc.sharp_idx);
|
||||
// Only load and apply buffer offsets if host GPU alignment is larger than guest.
|
||||
if (profile.needs_buffer_offsets) {
|
||||
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
|
||||
const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))};
|
||||
Name(buf_offset, fmt::format("buf{}_off", binding));
|
||||
buffer.Offset(PointerSize::B8) = buf_offset;
|
||||
|
||||
if (True(desc.used_types & IR::Type::U16)) {
|
||||
const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))};
|
||||
Name(buf_word_offset, fmt::format("buf{}_word_off", binding));
|
||||
buffer.Offset(PointerSize::B16) = buf_word_offset;
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))};
|
||||
Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding));
|
||||
buffer.Offset(PointerSize::B32) = buf_dword_offset;
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U64)) {
|
||||
const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))};
|
||||
Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding));
|
||||
buffer.Offset(PointerSize::B64) = buf_qword_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// Only load size if performing bounds checks.
|
||||
if (!profile.supports_robust_buffer_access) {
|
||||
const Id buf_size{desc.sharp_idx == std::numeric_limits<u32>::max()
|
||||
? ConstU32(desc.inline_cbuf.GetSize())
|
||||
: GetBufferSize(desc.sharp_idx)};
|
||||
Name(buf_size, fmt::format("buf{}_size", binding));
|
||||
buffer.Size(PointerSize::B8) = buf_size;
|
||||
|
||||
if (True(desc.used_types & IR::Type::U16)) {
|
||||
const Id buf_word_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(1U))};
|
||||
Name(buf_word_size, fmt::format("buf{}_short_size", binding));
|
||||
buffer.Size(PointerSize::B16) = buf_word_size;
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
const Id buf_dword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(2U))};
|
||||
Name(buf_dword_size, fmt::format("buf{}_dword_size", binding));
|
||||
buffer.Size(PointerSize::B32) = buf_dword_size;
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U64)) {
|
||||
const Id buf_qword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(3U))};
|
||||
Name(buf_qword_size, fmt::format("buf{}_qword_size", binding));
|
||||
buffer.Size(PointerSize::B64) = buf_qword_size;
|
||||
}
|
||||
Name(buffer.size, fmt::format("buf{}_size", binding));
|
||||
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
|
||||
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
|
||||
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
|
||||
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
|
||||
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
|
||||
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -779,8 +784,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
|
|||
};
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
if (!profile.supports_robust_buffer_access &&
|
||||
info.readconst_types == Info::ReadConstType::None) {
|
||||
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
|
||||
// In case Flatbuf has not already been bound by IR and is needed
|
||||
// to query buffer sizes, bind it now.
|
||||
info.buffers.push_back({
|
||||
|
@ -809,23 +813,23 @@ void EmitContext::DefineBuffers() {
|
|||
// Define aliases depending on the shader usage.
|
||||
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
||||
if (True(desc.used_types & IR::Type::U64)) {
|
||||
spv_buffer[PointerType::U64] =
|
||||
spv_buffer.Alias(PointerType::U64) =
|
||||
DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
spv_buffer[PointerType::U32] =
|
||||
spv_buffer.Alias(PointerType::U32) =
|
||||
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::F32)) {
|
||||
spv_buffer[PointerType::F32] =
|
||||
spv_buffer.Alias(PointerType::F32) =
|
||||
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U16)) {
|
||||
spv_buffer[PointerType::U16] =
|
||||
spv_buffer.Alias(PointerType::U16) =
|
||||
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U8)) {
|
||||
spv_buffer[PointerType::U8] =
|
||||
spv_buffer.Alias(PointerType::U8) =
|
||||
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
|
||||
}
|
||||
++binding.unified;
|
||||
|
@ -1154,7 +1158,7 @@ Id EmitContext::DefineGetBdaPointer() {
|
|||
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
|
||||
const auto page32{OpUConvert(U32[1], page)};
|
||||
const auto& bda_buffer{buffers[bda_pagetable_index]};
|
||||
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
|
||||
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64);
|
||||
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
|
||||
const auto bda{OpLoad(U64, bda_ptr)};
|
||||
|
||||
|
@ -1166,14 +1170,14 @@ Id EmitContext::DefineGetBdaPointer() {
|
|||
// First time acces, mark as fault
|
||||
AddLabel(fault_label);
|
||||
const auto& fault_buffer{buffers[fault_buffer_index]};
|
||||
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
|
||||
const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))};
|
||||
const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))};
|
||||
const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)};
|
||||
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32);
|
||||
const auto page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))};
|
||||
const auto page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))};
|
||||
const auto page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)};
|
||||
const auto fault_ptr{
|
||||
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)};
|
||||
const auto fault_value{OpLoad(U8, fault_ptr)};
|
||||
const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)};
|
||||
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)};
|
||||
const auto fault_value{OpLoad(U32[1], fault_ptr)};
|
||||
const auto fault_value_masked{OpBitwiseOr(U32[1], fault_value, page_mask)};
|
||||
OpStore(fault_ptr, fault_value_masked);
|
||||
|
||||
// Return null pointer
|
||||
|
@ -1211,14 +1215,15 @@ Id EmitContext::DefineReadConst(bool dynamic) {
|
|||
const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
|
||||
const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
|
||||
|
||||
const auto result = EmitMemoryRead(U32[1], addr, [&]() {
|
||||
const auto result = EmitDwordMemoryRead(addr, [&]() {
|
||||
if (dynamic) {
|
||||
return u32_zero_value;
|
||||
} else {
|
||||
const auto& flatbuf_buffer{buffers[flatbuf_index]};
|
||||
ASSERT(flatbuf_buffer.binding >= 0 &&
|
||||
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
|
||||
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
|
||||
const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
|
||||
flatbuf_buffer.Alias(PointerType::U32);
|
||||
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
|
||||
flatbuf_offset)};
|
||||
return OpLoad(U32[1], ptr);
|
||||
|
@ -1239,7 +1244,7 @@ void EmitContext::DefineFunctions() {
|
|||
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
|
||||
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
|
||||
}
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
if (info.uses_dma) {
|
||||
get_bda_pointer = DefineGetBdaPointer();
|
||||
}
|
||||
|
||||
|
|
|
@ -42,17 +42,6 @@ public:
|
|||
Bindings& binding);
|
||||
~EmitContext();
|
||||
|
||||
enum class PointerType : u32 {
|
||||
U8,
|
||||
U16,
|
||||
F16,
|
||||
U32,
|
||||
F32,
|
||||
U64,
|
||||
F64,
|
||||
NumAlias,
|
||||
};
|
||||
|
||||
Id Def(const IR::Value& value);
|
||||
|
||||
void DefineBufferProperties();
|
||||
|
@ -155,25 +144,7 @@ public:
|
|||
return last_label;
|
||||
}
|
||||
|
||||
PointerType PointerTypeFromType(Id type) {
|
||||
if (type.value == U8.value)
|
||||
return PointerType::U8;
|
||||
if (type.value == U16.value)
|
||||
return PointerType::U16;
|
||||
if (type.value == F16[1].value)
|
||||
return PointerType::F16;
|
||||
if (type.value == U32[1].value)
|
||||
return PointerType::U32;
|
||||
if (type.value == F32[1].value)
|
||||
return PointerType::F32;
|
||||
if (type.value == U64.value)
|
||||
return PointerType::U64;
|
||||
if (type.value == F64[1].value)
|
||||
return PointerType::F64;
|
||||
UNREACHABLE_MSG("Unknown type for pointer");
|
||||
}
|
||||
|
||||
Id EmitMemoryRead(Id type, Id address, auto&& fallback) {
|
||||
Id EmitDwordMemoryRead(Id address, auto&& fallback) {
|
||||
const Id available_label = OpLabel();
|
||||
const Id fallback_label = OpLabel();
|
||||
const Id merge_label = OpLabel();
|
||||
|
@ -185,10 +156,8 @@ public:
|
|||
|
||||
// Available
|
||||
AddLabel(available_label);
|
||||
const auto pointer_type = PointerTypeFromType(type);
|
||||
const Id pointer_type_id = physical_pointer_types[pointer_type];
|
||||
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
|
||||
const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
|
||||
const Id addr_ptr = OpConvertUToPtr(physical_pointer_type_u32, addr);
|
||||
const Id result = OpLoad(U32[1], addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
|
||||
OpBranch(merge_label);
|
||||
|
||||
// Fallback
|
||||
|
@ -199,7 +168,7 @@ public:
|
|||
// Merge
|
||||
AddLabel(merge_label);
|
||||
const Id final_result =
|
||||
OpPhi(type, fallback_result, fallback_label, result, available_label);
|
||||
OpPhi(U32[1], fallback_result, fallback_label, result, available_label);
|
||||
return final_result;
|
||||
}
|
||||
|
||||
|
@ -314,6 +283,24 @@ public:
|
|||
bool is_storage = false;
|
||||
};
|
||||
|
||||
enum class PointerType : u32 {
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
F32,
|
||||
U64,
|
||||
F64,
|
||||
NumAlias,
|
||||
};
|
||||
|
||||
enum class PointerSize : u32 {
|
||||
B8,
|
||||
B16,
|
||||
B32,
|
||||
B64,
|
||||
NumClass,
|
||||
};
|
||||
|
||||
struct BufferSpv {
|
||||
Id id;
|
||||
Id pointer_type;
|
||||
|
@ -322,32 +309,23 @@ public:
|
|||
struct BufferDefinition {
|
||||
u32 binding;
|
||||
BufferType buffer_type;
|
||||
Id offset;
|
||||
Id offset_dwords;
|
||||
Id size;
|
||||
Id size_shorts;
|
||||
Id size_dwords;
|
||||
Id size_qwords;
|
||||
std::array<Id, u32(PointerSize::NumClass)> offsets;
|
||||
std::array<Id, u32(PointerSize::NumClass)> sizes;
|
||||
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
|
||||
|
||||
const BufferSpv& operator[](PointerType alias) const {
|
||||
return aliases[u32(alias)];
|
||||
template <class Self>
|
||||
auto& Alias(this Self& self, PointerType alias) {
|
||||
return self.aliases[u32(alias)];
|
||||
}
|
||||
|
||||
BufferSpv& operator[](PointerType alias) {
|
||||
return aliases[u32(alias)];
|
||||
}
|
||||
};
|
||||
|
||||
struct PhysicalPointerTypes {
|
||||
std::array<Id, u32(PointerType::NumAlias)> types;
|
||||
|
||||
const Id& operator[](PointerType type) const {
|
||||
return types[u32(type)];
|
||||
template <class Self>
|
||||
auto& Offset(this Self& self, PointerSize size) {
|
||||
return self.offsets[u32(size)];
|
||||
}
|
||||
|
||||
Id& operator[](PointerType type) {
|
||||
return types[u32(type)];
|
||||
template <class Self>
|
||||
auto& Size(this Self& self, PointerSize size) {
|
||||
return self.sizes[u32(size)];
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -356,12 +334,12 @@ public:
|
|||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
PhysicalPointerTypes physical_pointer_types;
|
||||
std::unordered_map<u32, Id> first_to_last_label_map;
|
||||
|
||||
size_t flatbuf_index{};
|
||||
size_t bda_pagetable_index{};
|
||||
size_t fault_buffer_index{};
|
||||
Id physical_pointer_type_u32;
|
||||
|
||||
Id sampler_type{};
|
||||
Id sampler_pointer_type{};
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <bit>
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
|
|
|
@ -238,7 +238,7 @@ struct Info {
|
|||
Dynamic = 1 << 1,
|
||||
};
|
||||
ReadConstType readconst_types{};
|
||||
IR::Type dma_types{IR::Type::Void};
|
||||
bool uses_dma{false};
|
||||
|
||||
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
|
||||
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
|
|
|
@ -105,6 +105,49 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
|||
}
|
||||
}
|
||||
|
||||
u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU8:
|
||||
case IR::Opcode::StoreBufferU8:
|
||||
return 0;
|
||||
case IR::Opcode::LoadBufferU16:
|
||||
case IR::Opcode::StoreBufferU16:
|
||||
return 1;
|
||||
case IR::Opcode::LoadBufferU64:
|
||||
case IR::Opcode::StoreBufferU64:
|
||||
case IR::Opcode::BufferAtomicIAdd64:
|
||||
return 3;
|
||||
case IR::Opcode::LoadBufferFormatF32:
|
||||
case IR::Opcode::StoreBufferFormatF32: {
|
||||
switch (data_format) {
|
||||
case AmdGpu::DataFormat::Format8:
|
||||
return 0;
|
||||
case AmdGpu::DataFormat::Format8_8:
|
||||
case AmdGpu::DataFormat::Format16:
|
||||
return 1;
|
||||
case AmdGpu::DataFormat::Format8_8_8_8:
|
||||
case AmdGpu::DataFormat::Format16_16:
|
||||
case AmdGpu::DataFormat::Format10_11_11:
|
||||
case AmdGpu::DataFormat::Format2_10_10_10:
|
||||
case AmdGpu::DataFormat::Format16_16_16_16:
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
// Provided address is already in dwords
|
||||
return 0;
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsImageAtomicInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
|
@ -496,6 +539,22 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
|||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||
const AmdGpu::Buffer& buffer, u32 stride) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const u32 inst_offset = inst_info.inst_offset.Value();
|
||||
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
|
||||
const auto data_format = is_inst_typed
|
||||
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
|
||||
: buffer.GetDataFmt();
|
||||
const u32 shift = BufferAddressShift(inst, data_format);
|
||||
const u32 mask = (1 << shift) - 1;
|
||||
|
||||
// If address calculation is of the form "index * const_stride + offset" with offset constant
|
||||
// and both const_stride and offset are divisible with the element size, apply shift directly.
|
||||
if (inst_info.index_enable && !inst_info.offset_enable && !buffer.swizzle_enable &&
|
||||
!buffer.add_tid_enable && (stride & mask) == 0 && (inst_offset & mask) == 0) {
|
||||
// buffer_offset = index * (const_stride >> shift) + (inst_offset >> shift)
|
||||
const IR::U32 index = IR::U32{inst.Arg(1)};
|
||||
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), ir.Imm32(inst_offset >> shift));
|
||||
}
|
||||
|
||||
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0)
|
||||
IR::U32 index = ir.Imm32(0U);
|
||||
|
@ -512,7 +571,7 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
|
|||
index = ir.IAdd(index, thread_id);
|
||||
}
|
||||
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
|
||||
IR::U32 offset = ir.Imm32(inst_info.inst_offset.Value());
|
||||
IR::U32 offset = ir.Imm32(inst_offset);
|
||||
if (inst_info.offset_enable) {
|
||||
const IR::U32 vgpr_offset = inst_info.index_enable
|
||||
? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)}
|
||||
|
@ -545,6 +604,9 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
|
|||
// buffer_offset = index * const_stride + offset
|
||||
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
|
||||
}
|
||||
if (shift != 0) {
|
||||
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
|
||||
}
|
||||
return buffer_offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
info.uses_lane_id = true;
|
||||
break;
|
||||
case IR::Opcode::ReadConst:
|
||||
if (info.readconst_types == Info::ReadConstType::None) {
|
||||
if (!info.uses_dma) {
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
// We can't guarantee that flatbuf will not grow past UBO
|
||||
|
@ -116,7 +116,7 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
} else {
|
||||
info.readconst_types |= Info::ReadConstType::Dynamic;
|
||||
}
|
||||
info.dma_types |= IR::Type::U32;
|
||||
info.uses_dma = true;
|
||||
break;
|
||||
case IR::Opcode::PackUfloat10_11_11:
|
||||
info.uses_pack_10_11_11 = true;
|
||||
|
@ -130,21 +130,22 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
}
|
||||
|
||||
void CollectShaderInfoPass(IR::Program& program) {
|
||||
auto& info = program.info;
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Visit(program.info, inst);
|
||||
Visit(info, inst);
|
||||
}
|
||||
}
|
||||
|
||||
if (program.info.dma_types != IR::Type::Void) {
|
||||
program.info.buffers.push_back({
|
||||
if (info.uses_dma) {
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U64,
|
||||
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::BDA_PAGETABLE_SIZE),
|
||||
.buffer_type = BufferType::BdaPagetable,
|
||||
.is_written = true,
|
||||
});
|
||||
program.info.buffers.push_back({
|
||||
.used_types = IR::Type::U8,
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE),
|
||||
.buffer_type = BufferType::FaultBuffer,
|
||||
.is_written = true,
|
||||
|
|
|
@ -35,7 +35,7 @@ struct Profile {
|
|||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
bool needs_lds_barriers{};
|
||||
u64 min_ssbo_alignment{};
|
||||
bool needs_buffer_offsets{};
|
||||
u64 max_ubo_size{};
|
||||
u32 max_viewport_width{};
|
||||
u32 max_viewport_height{};
|
||||
|
|
|
@ -225,6 +225,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||
instance.GetDriverID() == vk::DriverId::eMoltenvk,
|
||||
.needs_buffer_offsets = instance.StorageMinAlignment() > 4,
|
||||
// When binding a UBO, we calculate its size considering the offset in the larger buffer
|
||||
// cache underlying resource. In some cases, it may produce sizes exceeding the system
|
||||
// maximum allowed UBO range, so we need to reduce the threshold to prevent issues.
|
||||
|
|
|
@ -468,17 +468,12 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||
stage->PushUd(binding, push_data);
|
||||
BindBuffers(*stage, binding, push_data);
|
||||
BindTextures(*stage, binding);
|
||||
|
||||
uses_dma |= stage->dma_types != Shader::IR::Type::Void;
|
||||
uses_dma |= stage->uses_dma;
|
||||
}
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
if (uses_dma && !fault_process_pending) {
|
||||
// We only use fault buffer for DMA right now.
|
||||
{
|
||||
// TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP)
|
||||
// we need to account for that and synchronize.
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
for (auto& range : mapped_ranges) {
|
||||
buffer_cache.SynchronizeBuffersInRange(range.lower(),
|
||||
|
@ -490,6 +485,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||
|
||||
fault_process_pending |= uses_dma;
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue