mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-13 05:05:57 +00:00
shader_recompiler: Implement AMD buffer bounds checking behavior. (#2448)
* shader_recompiler: Implement AMD buffer bounds checking behavior. * shader_recompiler: Use SRT flatbuf for bounds check size. * shader_recompiler: Fix buffer atomic bounds check. * buffer_cache: Prevent false image-to-buffer sync. Lowering vertex fetch to formatted buffer surfaced an issue where a CPU modified range may be overwritten with stale GPU modified image data. * Address review comments.
This commit is contained in:
parent
b06790dfe5
commit
fd3d3c4158
19 changed files with 376 additions and 158 deletions
|
@ -21,6 +21,28 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
|||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a conditional branch to make sure that
|
||||
// the atomic is not mistakenly executed when the index is out of bounds.
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
|
||||
const Id ib_label = ctx.OpLabel();
|
||||
const Id oob_label = ctx.OpLabel();
|
||||
const Id end_label = ctx.OpLabel();
|
||||
ctx.OpBranchConditional(in_bounds, ib_label, oob_label);
|
||||
ctx.AddLabel(ib_label);
|
||||
const Id ib_result = emit_func();
|
||||
ctx.OpBranch(end_label);
|
||||
ctx.AddLabel(oob_label);
|
||||
const Id oob_result = ctx.u32_zero_value;
|
||||
ctx.OpBranch(end_label);
|
||||
ctx.AddLabel(end_label);
|
||||
return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label);
|
||||
}
|
||||
// Bounds checking not enabled, just perform the atomic operation.
|
||||
return emit_func();
|
||||
}
|
||||
|
||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
|
@ -31,7 +53,9 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
|||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||
|
|
|
@ -178,14 +178,21 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
|||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
|
||||
|
||||
if (Sirit::ValidId(buffer.size_dwords)) {
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
|
||||
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
|
||||
return ctx.OpLoad(
|
||||
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||
ctx.push_data_block,
|
||||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
ctx.push_data_block, ctx.ConstU32(index)));
|
||||
}
|
||||
|
||||
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
|
@ -402,8 +409,30 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
|||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result,
|
||||
bool is_float) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a select.
|
||||
const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N];
|
||||
auto compare_index = index;
|
||||
auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value;
|
||||
if (N > 1) {
|
||||
compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
|
||||
std::array<Id, N> zero_ids;
|
||||
zero_ids.fill(zero_value);
|
||||
zero_value = ctx.ConstantComposite(result_type, zero_ids);
|
||||
}
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
|
||||
return ctx.OpSelect(result_type, in_bounds, result, zero_value);
|
||||
}
|
||||
// Bounds checking not enabled, just return the plain value.
|
||||
return result;
|
||||
}
|
||||
|
||||
template <u32 N, BufferAlias alias>
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
|
@ -411,31 +440,42 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) {
|
|||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(data_types[1], ptr);
|
||||
} else {
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i)};
|
||||
ids.push_back(ctx.OpLoad(data_types[1], ptr));
|
||||
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
|
||||
if (!flags.typed) {
|
||||
// Untyped loads have bounds checking per-component.
|
||||
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
|
||||
result_i, alias == BufferAlias::F32));
|
||||
} else {
|
||||
ids.push_back(result_i);
|
||||
}
|
||||
return ctx.OpCompositeConstruct(data_types[N], ids);
|
||||
}
|
||||
|
||||
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
|
||||
if (flags.typed) {
|
||||
// Typed loads have single bounds check for the whole load.
|
||||
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
|
||||
alias == BufferAlias::F32);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr));
|
||||
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
|
||||
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
|
@ -443,47 +483,73 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
|||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr));
|
||||
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
|
||||
return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, handle, address);
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, handle, address);
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, handle, address);
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, handle, address);
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE_MSG("SPIR-V instruction");
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a conditional branch.
|
||||
auto compare_index = index;
|
||||
if (N > 1) {
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
|
||||
}
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
|
||||
const Id in_bounds_label = ctx.OpLabel();
|
||||
const Id merge_label = ctx.OpLabel();
|
||||
ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||
ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label);
|
||||
ctx.AddLabel(in_bounds_label);
|
||||
emit_func();
|
||||
ctx.OpBranch(merge_label);
|
||||
ctx.AddLabel(merge_label);
|
||||
return;
|
||||
}
|
||||
// Bounds checking not enabled, just perform the store.
|
||||
emit_func();
|
||||
}
|
||||
|
||||
template <u32 N, BufferAlias alias>
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
|
@ -491,15 +557,27 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id va
|
|||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, value);
|
||||
} else {
|
||||
|
||||
auto store = [&] {
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(data_types[1], value, i));
|
||||
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
|
||||
auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); };
|
||||
if (!flags.typed) {
|
||||
// Untyped stores have bounds checking per-component.
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i);
|
||||
} else {
|
||||
store_i();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (flags.typed) {
|
||||
// Typed stores have single bounds check for the whole store.
|
||||
EmitStoreBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, store);
|
||||
} else {
|
||||
store();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -510,7 +588,8 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
|
|||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U8, value));
|
||||
const Id result{ctx.OpUConvert(ctx.U8, value)};
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
|
||||
}
|
||||
|
||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
|
@ -521,39 +600,41 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
|
|||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U16, value));
|
||||
const Id result{ctx.OpUConvert(ctx.U16, value)};
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts,
|
||||
[&] { ctx.OpStore(ptr, result); });
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, handle, address, value);
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, handle, address, value);
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, handle, address, value);
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, handle, address, value);
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
|
|
|
@ -14,7 +14,7 @@ void EmitPrologue(EmitContext& ctx) {
|
|||
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
|
||||
ctx.DefineWorkgroupIndex();
|
||||
}
|
||||
ctx.DefineBufferOffsets();
|
||||
ctx.DefineBufferProperties();
|
||||
}
|
||||
|
||||
void ConvertDepthMode(EmitContext& ctx) {
|
||||
|
|
|
@ -192,8 +192,27 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBufferOffsets() {
|
||||
for (BufferDefinition& buffer : buffers) {
|
||||
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
|
||||
const auto& srt_flatbuf = buffers.back();
|
||||
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
|
||||
const auto rsrc1{
|
||||
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
|
||||
const auto rsrc2{
|
||||
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 2)))};
|
||||
|
||||
const auto stride{OpBitFieldUExtract(U32[1], rsrc1, ConstU32(16u), ConstU32(14u))};
|
||||
const auto num_records{rsrc2};
|
||||
|
||||
const auto stride_zero{OpIEqual(U1[1], stride, u32_zero_value)};
|
||||
const auto stride_size{OpIMul(U32[1], num_records, stride)};
|
||||
return OpSelect(U32[1], stride_zero, num_records, stride_size);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBufferProperties() {
|
||||
for (u32 i = 0; i < buffers.size(); i++) {
|
||||
BufferDefinition& buffer = buffers[i];
|
||||
if (buffer.buffer_type != BufferType::Guest) {
|
||||
continue;
|
||||
}
|
||||
|
@ -208,6 +227,22 @@ void EmitContext::DefineBufferOffsets() {
|
|||
Name(buffer.offset, fmt::format("buf{}_off", binding));
|
||||
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
|
||||
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
|
||||
|
||||
// Only need to load size if performing bounds checks and the buffer is both guest and not
|
||||
// inline.
|
||||
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
|
||||
const BufferResource& desc = info.buffers[i];
|
||||
if (desc.sharp_idx == std::numeric_limits<u32>::max()) {
|
||||
buffer.size = ConstU32(desc.inline_cbuf.GetSize());
|
||||
} else {
|
||||
buffer.size = GetBufferSize(desc.sharp_idx);
|
||||
}
|
||||
Name(buffer.size, fmt::format("buf{}_size", binding));
|
||||
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
|
||||
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
|
||||
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
|
||||
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -589,34 +624,34 @@ void EmitContext::DefineOutputs() {
|
|||
|
||||
void EmitContext::DefinePushDataBlock() {
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4], U32[4],
|
||||
U32[4], F32[1], F32[1], F32[1], F32[1]),
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
||||
U32[4], U32[4], U32[4], U32[4], U32[4]),
|
||||
"AuxData")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberName(struct_type, Shader::PushData::BufOffsetIndex + 0, "buf_offsets0");
|
||||
MemberName(struct_type, Shader::PushData::BufOffsetIndex + 1, "buf_offsets1");
|
||||
MemberName(struct_type, Shader::PushData::UdRegsIndex + 0, "ud_regs0");
|
||||
MemberName(struct_type, Shader::PushData::UdRegsIndex + 1, "ud_regs1");
|
||||
MemberName(struct_type, Shader::PushData::UdRegsIndex + 2, "ud_regs2");
|
||||
MemberName(struct_type, Shader::PushData::UdRegsIndex + 3, "ud_regs3");
|
||||
MemberName(struct_type, Shader::PushData::XOffsetIndex, "xoffset");
|
||||
MemberName(struct_type, Shader::PushData::YOffsetIndex, "yoffset");
|
||||
MemberName(struct_type, Shader::PushData::XScaleIndex, "xscale");
|
||||
MemberName(struct_type, Shader::PushData::YScaleIndex, "yscale");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 8U);
|
||||
MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 24U);
|
||||
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 0, spv::Decoration::Offset, 40U);
|
||||
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 1, spv::Decoration::Offset, 56U);
|
||||
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 2, spv::Decoration::Offset, 72U);
|
||||
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 3, spv::Decoration::Offset, 88U);
|
||||
MemberDecorate(struct_type, Shader::PushData::XOffsetIndex, spv::Decoration::Offset, 104U);
|
||||
MemberDecorate(struct_type, Shader::PushData::YOffsetIndex, spv::Decoration::Offset, 108U);
|
||||
MemberDecorate(struct_type, Shader::PushData::XScaleIndex, spv::Decoration::Offset, 112U);
|
||||
MemberDecorate(struct_type, Shader::PushData::YScaleIndex, spv::Decoration::Offset, 116U);
|
||||
MemberName(struct_type, PushData::Step0Index, "sr0");
|
||||
MemberName(struct_type, PushData::Step1Index, "sr1");
|
||||
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
|
||||
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
|
||||
MemberName(struct_type, PushData::XScaleIndex, "xscale");
|
||||
MemberName(struct_type, PushData::YScaleIndex, "yscale");
|
||||
MemberName(struct_type, PushData::UdRegsIndex + 0, "ud_regs0");
|
||||
MemberName(struct_type, PushData::UdRegsIndex + 1, "ud_regs1");
|
||||
MemberName(struct_type, PushData::UdRegsIndex + 2, "ud_regs2");
|
||||
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
|
||||
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
||||
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
||||
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
||||
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
||||
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U);
|
||||
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U);
|
||||
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U);
|
||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U);
|
||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U);
|
||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U);
|
||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(push_data_block, "push_data");
|
||||
interfaces.push_back(push_data_block);
|
||||
|
@ -661,12 +696,22 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
|
|||
break;
|
||||
default:
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer));
|
||||
break;
|
||||
}
|
||||
interfaces.push_back(id);
|
||||
return {id, pointer_type};
|
||||
};
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
if (!profile.supports_robust_buffer_access && !info.has_readconst) {
|
||||
// In case ReadConstUbo has not already been bound by IR and is needed
|
||||
// to query buffer sizes, bind it now.
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::ReadConstUbo,
|
||||
});
|
||||
}
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto buf_sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(buf_sharp, profile);
|
||||
|
|
|
@ -43,7 +43,7 @@ public:
|
|||
|
||||
Id Def(const IR::Value& value);
|
||||
|
||||
void DefineBufferOffsets();
|
||||
void DefineBufferProperties();
|
||||
void DefineInterpolatedAttribs();
|
||||
void DefineWorkgroupIndex();
|
||||
|
||||
|
@ -248,6 +248,9 @@ public:
|
|||
BufferType buffer_type;
|
||||
Id offset;
|
||||
Id offset_dwords;
|
||||
Id size;
|
||||
Id size_shorts;
|
||||
Id size_dwords;
|
||||
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
|
||||
|
||||
const BufferSpv& operator[](BufferAlias alias) const {
|
||||
|
@ -307,6 +310,8 @@ private:
|
|||
|
||||
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
|
||||
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
|
||||
|
||||
Id GetBufferSize(u32 sharp_idx);
|
||||
};
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue