shader_recompiler: Implement AMD buffer bounds checking behavior. (#2448)

* shader_recompiler: Implement AMD buffer bounds checking behavior.

* shader_recompiler: Use SRT flatbuf for bounds check size.

* shader_recompiler: Fix buffer atomic bounds check.

* buffer_cache: Prevent false image-to-buffer sync.

Lowering vertex fetch to formatted buffer surfaced an issue where a CPU modified range may be overwritten with stale GPU modified image data.

* Address review comments.
This commit is contained in:
squidbus 2025-02-17 06:13:39 -08:00 committed by GitHub
parent b06790dfe5
commit fd3d3c4158
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 376 additions and 158 deletions

View file

@ -21,6 +21,28 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
} }
Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a conditional branch to make sure that
// the atomic is not mistakenly executed when the index is out of bounds.
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
const Id ib_label = ctx.OpLabel();
const Id oob_label = ctx.OpLabel();
const Id end_label = ctx.OpLabel();
ctx.OpBranchConditional(in_bounds, ib_label, oob_label);
ctx.AddLabel(ib_label);
const Id ib_result = emit_func();
ctx.OpBranch(end_label);
ctx.AddLabel(oob_label);
const Id oob_result = ctx.u32_zero_value;
ctx.OpBranch(end_label);
ctx.AddLabel(end_label);
return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label);
}
// Bounds checking not enabled, just perform the atomic operation.
return emit_func();
}
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
@ -31,7 +53,9 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32]; const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
});
} }
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,

View file

@ -178,14 +178,21 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const auto [id, pointer_type] = buffer[BufferAlias::U32]; const auto [id, pointer_type] = buffer[BufferAlias::U32];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
return ctx.OpLoad(ctx.U32[1], ptr); const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
if (Sirit::ValidId(buffer.size_dwords)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
} else {
return result;
}
} }
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
return ctx.OpLoad( return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.push_data_block, ctx.push_data_block, ctx.ConstU32(index)));
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
} }
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
@ -402,8 +409,30 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
ctx.OpStore(pointer, value); ctx.OpStore(pointer, value);
} }
template <u32 N>
static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result,
bool is_float) {
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a select.
const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N];
auto compare_index = index;
auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value;
if (N > 1) {
compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
std::array<Id, N> zero_ids;
zero_ids.fill(zero_value);
zero_value = ctx.ConstantComposite(result_type, zero_ids);
}
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
return ctx.OpSelect(result_type, in_bounds, result, zero_value);
}
// Bounds checking not enabled, just return the plain value.
return result;
}
template <u32 N, BufferAlias alias> template <u32 N, BufferAlias alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) { static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
@ -411,31 +440,42 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) {
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias]; const auto [id, pointer_type] = spv_buffer[alias];
if constexpr (N == 1) {
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
return ctx.OpLoad(data_types[1], ptr);
} else {
boost::container::static_vector<Id, N> ids; boost::container::static_vector<Id, N> ids;
for (u32 i = 0; i < N; i++) { for (u32 i = 0; i < N; i++) {
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i)}; const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
ids.push_back(ctx.OpLoad(data_types[1], ptr)); const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
} if (!flags.typed) {
return ctx.OpCompositeConstruct(data_types[N], ids); // Untyped loads have bounds checking per-component.
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
result_i, alias == BufferAlias::F32));
} else {
ids.push_back(result_i);
} }
} }
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
alias == BufferAlias::F32);
}
return result;
}
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
} }
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr)); const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
} }
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
@ -443,47 +483,73 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr)); const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false);
} }
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, handle, address); return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, handle, address); return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, handle, address); return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, handle, address); return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, handle, address); return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, handle, address); return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, handle, address); return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, handle, address); return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
} }
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
UNREACHABLE_MSG("SPIR-V instruction"); UNREACHABLE_MSG("SPIR-V instruction");
} }
template <u32 N>
void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a conditional branch.
auto compare_index = index;
if (N > 1) {
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
}
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
const Id in_bounds_label = ctx.OpLabel();
const Id merge_label = ctx.OpLabel();
ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label);
ctx.AddLabel(in_bounds_label);
emit_func();
ctx.OpBranch(merge_label);
ctx.AddLabel(merge_label);
return;
}
// Bounds checking not enabled, just perform the store.
emit_func();
}
template <u32 N, BufferAlias alias> template <u32 N, BufferAlias alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id value) { static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
@ -491,16 +557,28 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id va
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias]; const auto [id, pointer_type] = spv_buffer[alias];
if constexpr (N == 1) {
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; auto store = [&] {
ctx.OpStore(ptr, value);
} else {
for (u32 i = 0; i < N; i++) { for (u32 i = 0; i < N; i++) {
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
ctx.OpStore(ptr, ctx.OpCompositeExtract(data_types[1], value, i)); const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); };
if (!flags.typed) {
// Untyped stores have bounds checking per-component.
EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i);
} else {
store_i();
} }
} }
};
if (flags.typed) {
// Typed stores have single bounds check for the whole store.
EmitStoreBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, store);
} else {
store();
}
} }
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
@ -510,7 +588,8 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
} }
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U8, value)); const Id result{ctx.OpUConvert(ctx.U8, value)};
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
} }
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
@ -521,39 +600,41 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U16, value)); const Id result{ctx.OpUConvert(ctx.U16, value)};
EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts,
[&] { ctx.OpStore(ptr, result); });
} }
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, handle, address, value); EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, handle, address, value); EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, handle, address, value); EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, handle, address, value); EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, handle, address, value); EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, handle, address, value); EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, handle, address, value); EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, handle, address, value); EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
} }
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View file

@ -14,7 +14,7 @@ void EmitPrologue(EmitContext& ctx) {
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) { if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
ctx.DefineWorkgroupIndex(); ctx.DefineWorkgroupIndex();
} }
ctx.DefineBufferOffsets(); ctx.DefineBufferProperties();
} }
void ConvertDepthMode(EmitContext& ctx) { void ConvertDepthMode(EmitContext& ctx) {

View file

@ -192,8 +192,27 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
UNREACHABLE_MSG("Invalid attribute type {}", fmt); UNREACHABLE_MSG("Invalid attribute type {}", fmt);
} }
void EmitContext::DefineBufferOffsets() { Id EmitContext::GetBufferSize(const u32 sharp_idx) {
for (BufferDefinition& buffer : buffers) { const auto& srt_flatbuf = buffers.back();
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
const auto rsrc2{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 2)))};
const auto stride{OpBitFieldUExtract(U32[1], rsrc1, ConstU32(16u), ConstU32(14u))};
const auto num_records{rsrc2};
const auto stride_zero{OpIEqual(U1[1], stride, u32_zero_value)};
const auto stride_size{OpIMul(U32[1], num_records, stride)};
return OpSelect(U32[1], stride_zero, num_records, stride_size);
}
void EmitContext::DefineBufferProperties() {
for (u32 i = 0; i < buffers.size(); i++) {
BufferDefinition& buffer = buffers[i];
if (buffer.buffer_type != BufferType::Guest) { if (buffer.buffer_type != BufferType::Guest) {
continue; continue;
} }
@ -208,6 +227,22 @@ void EmitContext::DefineBufferOffsets() {
Name(buffer.offset, fmt::format("buf{}_off", binding)); Name(buffer.offset, fmt::format("buf{}_off", binding));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
// Only need to load size if performing bounds checks and the buffer is both guest and not
// inline.
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
const BufferResource& desc = info.buffers[i];
if (desc.sharp_idx == std::numeric_limits<u32>::max()) {
buffer.size = ConstU32(desc.inline_cbuf.GetSize());
} else {
buffer.size = GetBufferSize(desc.sharp_idx);
}
Name(buffer.size, fmt::format("buf{}_size", binding));
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
}
} }
} }
@ -589,34 +624,34 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() { void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates // Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4], U32[4], const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
U32[4], F32[1], F32[1], F32[1], F32[1]), U32[4], U32[4], U32[4], U32[4], U32[4]),
"AuxData")}; "AuxData")};
Decorate(struct_type, spv::Decoration::Block); Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0"); MemberName(struct_type, PushData::Step0Index, "sr0");
MemberName(struct_type, 1, "sr1"); MemberName(struct_type, PushData::Step1Index, "sr1");
MemberName(struct_type, Shader::PushData::BufOffsetIndex + 0, "buf_offsets0"); MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
MemberName(struct_type, Shader::PushData::BufOffsetIndex + 1, "buf_offsets1"); MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
MemberName(struct_type, Shader::PushData::UdRegsIndex + 0, "ud_regs0"); MemberName(struct_type, PushData::XScaleIndex, "xscale");
MemberName(struct_type, Shader::PushData::UdRegsIndex + 1, "ud_regs1"); MemberName(struct_type, PushData::YScaleIndex, "yscale");
MemberName(struct_type, Shader::PushData::UdRegsIndex + 2, "ud_regs2"); MemberName(struct_type, PushData::UdRegsIndex + 0, "ud_regs0");
MemberName(struct_type, Shader::PushData::UdRegsIndex + 3, "ud_regs3"); MemberName(struct_type, PushData::UdRegsIndex + 1, "ud_regs1");
MemberName(struct_type, Shader::PushData::XOffsetIndex, "xoffset"); MemberName(struct_type, PushData::UdRegsIndex + 2, "ud_regs2");
MemberName(struct_type, Shader::PushData::YOffsetIndex, "yoffset"); MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
MemberName(struct_type, Shader::PushData::XScaleIndex, "xscale"); MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
MemberName(struct_type, Shader::PushData::YScaleIndex, "yscale"); MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 8U); MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 24U); MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U);
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 0, spv::Decoration::Offset, 40U); MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U);
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 1, spv::Decoration::Offset, 56U); MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U);
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 2, spv::Decoration::Offset, 72U); MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U);
MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 3, spv::Decoration::Offset, 88U); MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U);
MemberDecorate(struct_type, Shader::PushData::XOffsetIndex, spv::Decoration::Offset, 104U); MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U);
MemberDecorate(struct_type, Shader::PushData::YOffsetIndex, spv::Decoration::Offset, 108U); MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
MemberDecorate(struct_type, Shader::PushData::XScaleIndex, spv::Decoration::Offset, 112U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
MemberDecorate(struct_type, Shader::PushData::YScaleIndex, spv::Decoration::Offset, 116U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data"); Name(push_data_block, "push_data");
interfaces.push_back(push_data_block); interfaces.push_back(push_data_block);
@ -661,12 +696,22 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
break; break;
default: default:
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer)); Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer));
break;
} }
interfaces.push_back(id); interfaces.push_back(id);
return {id, pointer_type}; return {id, pointer_type};
}; };
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access && !info.has_readconst) {
// In case ReadConstUbo has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::ReadConstUbo,
});
}
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info); const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile); const bool is_storage = desc.IsStorage(buf_sharp, profile);

View file

@ -43,7 +43,7 @@ public:
Id Def(const IR::Value& value); Id Def(const IR::Value& value);
void DefineBufferOffsets(); void DefineBufferProperties();
void DefineInterpolatedAttribs(); void DefineInterpolatedAttribs();
void DefineWorkgroupIndex(); void DefineWorkgroupIndex();
@ -248,6 +248,9 @@ public:
BufferType buffer_type; BufferType buffer_type;
Id offset; Id offset;
Id offset_dwords; Id offset_dwords;
Id size;
Id size_shorts;
Id size_dwords;
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases; std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
const BufferSpv& operator[](BufferAlias alias) const { const BufferSpv& operator[](BufferAlias alias) const {
@ -307,6 +310,8 @@ private:
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name); Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name); Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
Id GetBufferSize(u32 sharp_idx);
}; };
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View file

@ -9,6 +9,12 @@
namespace Shader::Gcn { namespace Shader::Gcn {
const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
return code;
}
/** /**
* s_load_dwordx4 s[8:11], s[2:3], 0x00 * s_load_dwordx4 s[8:11], s[2:3], 0x00
* s_load_dwordx4 s[12:15], s[2:3], 0x04 * s_load_dwordx4 s[12:15], s[2:3], 0x04
@ -38,9 +44,8 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
if (!info.has_fetch_shader) { if (!info.has_fetch_shader) {
return std::nullopt; return std::nullopt;
} }
const u32* code;
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
const auto* code = GetFetchShaderCode(info, info.fetch_shader_sgpr_base);
FetchShaderData data{.code = code}; FetchShaderData data{.code = code};
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max()); GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder; GcnDecodeContext decoder;

View file

@ -64,6 +64,8 @@ struct FetchShaderData {
} }
}; };
const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base);
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info); std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View file

@ -4,6 +4,7 @@
#include "common/config.h" #include "common/config.h"
#include "common/io_file.h" #include "common/io_file.h"
#include "common/path_util.h" #include "common/path_util.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
@ -470,8 +471,29 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
void Translator::EmitFetch(const GcnInst& inst) { void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly. // Read the pointer to the fetch shader assembly.
const auto code_sgpr_base = inst.src[0].code;
if (!profile.supports_robust_buffer_access) {
// The fetch shader must be inlined to access as regular buffers, so that
// bounds checks can be emitted to emulate robust buffer access.
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
// Decode and save instructions
u32 sub_pc = 0;
while (!slice.atEnd()) {
const auto sub_inst = decoder.decodeInstruction(slice);
if (sub_inst.opcode == Opcode::S_SETPC_B64) {
// Assume we're swapping back to the main shader.
break;
}
TranslateInstruction(sub_inst, sub_pc++);
}
return;
}
info.has_fetch_shader = true; info.has_fetch_shader = true;
info.fetch_shader_sgpr_base = inst.src[0].code; info.fetch_shader_sgpr_base = code_sgpr_base;
const auto fetch_data = ParseFetchShader(info); const auto fetch_data = ParseFetchShader(info);
ASSERT(fetch_data.has_value()); ASSERT(fetch_data.has_value());
@ -520,6 +542,40 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
info.translation_failed = true; info.translation_failed = true;
} }
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
// Emit instructions for each category.
switch (inst.category) {
case InstCategory::DataShare:
EmitDataShare(inst);
break;
case InstCategory::VectorInterpolation:
EmitVectorInterpolation(inst);
break;
case InstCategory::ScalarMemory:
EmitScalarMemory(inst);
break;
case InstCategory::VectorMemory:
EmitVectorMemory(inst);
break;
case InstCategory::Export:
EmitExport(inst);
break;
case InstCategory::FlowControl:
EmitFlowControl(pc, inst);
break;
case InstCategory::ScalarALU:
EmitScalarAlu(inst);
break;
case InstCategory::VectorALU:
EmitVectorAlu(inst);
break;
case InstCategory::DebugProfile:
break;
default:
UNREACHABLE();
}
}
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info, void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile) { const RuntimeInfo& runtime_info, const Profile& profile) {
if (inst_list.empty()) { if (inst_list.empty()) {
@ -537,37 +593,7 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
continue; continue;
} }
// Emit instructions for each category. translator.TranslateInstruction(inst, pc);
switch (inst.category) {
case InstCategory::DataShare:
translator.EmitDataShare(inst);
break;
case InstCategory::VectorInterpolation:
translator.EmitVectorInterpolation(inst);
break;
case InstCategory::ScalarMemory:
translator.EmitScalarMemory(inst);
break;
case InstCategory::VectorMemory:
translator.EmitVectorMemory(inst);
break;
case InstCategory::Export:
translator.EmitExport(inst);
break;
case InstCategory::FlowControl:
translator.EmitFlowControl(pc, inst);
break;
case InstCategory::ScalarALU:
translator.EmitScalarAlu(inst);
break;
case InstCategory::VectorALU:
translator.EmitVectorAlu(inst);
break;
case InstCategory::DebugProfile:
break;
default:
UNREACHABLE();
}
} }
} }

View file

@ -58,6 +58,8 @@ public:
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info, explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
const Profile& profile); const Profile& profile);
void TranslateInstruction(const GcnInst& inst, u32 pc);
// Instruction categories // Instruction categories
void EmitPrologue(); void EmitPrologue();
void EmitFetch(const GcnInst& inst); void EmitFetch(const GcnInst& inst);

View file

@ -195,6 +195,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc); buffer_info.system_coherent.Assign(mubuf.slc);
buffer_info.typed.Assign(is_typed);
if (is_typed) { if (is_typed) {
const auto& mtbuf = inst.control.mtbuf; const auto& mtbuf = inst.control.mtbuf;
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt); const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
@ -241,6 +242,7 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc); buffer_info.system_coherent.Assign(mubuf.slc);
buffer_info.typed.Assign(true);
const IR::Value handle = const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
@ -283,6 +285,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc); buffer_info.system_coherent.Assign(mubuf.slc);
buffer_info.typed.Assign(is_typed);
if (is_typed) { if (is_typed) {
const auto& mtbuf = inst.control.mtbuf; const auto& mtbuf = inst.control.mtbuf;
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt); const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
@ -339,6 +342,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc); buffer_info.system_coherent.Assign(mubuf.slc);
buffer_info.typed.Assign(true);
const IR::VectorReg src_reg{inst.src[1].code}; const IR::VectorReg src_reg{inst.src[1].code};

View file

@ -23,6 +23,10 @@
namespace Shader { namespace Shader {
static constexpr size_t NumUserDataRegs = 16; static constexpr size_t NumUserDataRegs = 16;
static constexpr size_t NumImages = 64;
static constexpr size_t NumBuffers = 32;
static constexpr size_t NumSamplers = 16;
static constexpr size_t NumFMasks = 8;
enum class TextureType : u32 { enum class TextureType : u32 {
Color1D, Color1D,
@ -63,7 +67,7 @@ struct BufferResource {
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
}; };
using BufferResourceList = boost::container::small_vector<BufferResource, 16>; using BufferResourceList = boost::container::small_vector<BufferResource, NumBuffers>;
struct ImageResource { struct ImageResource {
u32 sharp_idx; u32 sharp_idx;
@ -74,7 +78,7 @@ struct ImageResource {
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
}; };
using ImageResourceList = boost::container::small_vector<ImageResource, 16>; using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>;
struct SamplerResource { struct SamplerResource {
u32 sharp_idx; u32 sharp_idx;
@ -84,31 +88,33 @@ struct SamplerResource {
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
}; };
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>; using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>;
struct FMaskResource { struct FMaskResource {
u32 sharp_idx; u32 sharp_idx;
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
}; };
using FMaskResourceList = boost::container::small_vector<FMaskResource, 16>; using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
struct PushData { struct PushData {
static constexpr u32 BufOffsetIndex = 2; static constexpr u32 Step0Index = 0;
static constexpr u32 UdRegsIndex = 4; static constexpr u32 Step1Index = 1;
static constexpr u32 XOffsetIndex = 8; static constexpr u32 XOffsetIndex = 2;
static constexpr u32 YOffsetIndex = 9; static constexpr u32 YOffsetIndex = 3;
static constexpr u32 XScaleIndex = 10; static constexpr u32 XScaleIndex = 4;
static constexpr u32 YScaleIndex = 11; static constexpr u32 YScaleIndex = 5;
static constexpr u32 UdRegsIndex = 6;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
u32 step0; u32 step0;
u32 step1; u32 step1;
std::array<u8, 32> buf_offsets;
std::array<u32, NumUserDataRegs> ud_regs;
float xoffset; float xoffset;
float yoffset; float yoffset;
float xscale; float xscale;
float yscale; float yscale;
std::array<u32, NumUserDataRegs> ud_regs;
std::array<u8, NumBuffers> buf_offsets;
void AddOffset(u32 binding, u32 offset) { void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size()); ASSERT(offset < 256 && binding < buf_offsets.size());

View file

@ -51,6 +51,7 @@ union BufferInstInfo {
BitField<2, 12, u32> inst_offset; BitField<2, 12, u32> inst_offset;
BitField<14, 1, u32> system_coherent; BitField<14, 1, u32> system_coherent;
BitField<15, 1, u32> globally_coherent; BitField<15, 1, u32> globally_coherent;
BitField<16, 1, u32> typed;
}; };
enum class ScalarReg : u32 { enum class ScalarReg : u32 {

View file

@ -25,6 +25,7 @@ struct Profile {
bool support_legacy_vertex_attributes{}; bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{}; bool supports_image_load_store_lod{};
bool supports_native_cube_calc{}; bool supports_native_cube_calc{};
bool supports_robust_buffer_access{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool needs_manual_interpolation{}; bool needs_manual_interpolation{};

View file

@ -608,7 +608,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
return false; return false;
} }
Image& image = texture_cache.GetImage(image_id); Image& image = texture_cache.GetImage(image_id);
if (False(image.flags & ImageFlagBits::GpuModified)) { // Only perform sync if image is:
// - GPU modified; otherwise there are no changes to synchronize.
// - Not CPU modified; otherwise we could overwrite CPU changes with stale GPU changes.
if (False(image.flags & ImageFlagBits::GpuModified) ||
True(image.flags & ImageFlagBits::CpuDirty)) {
return false; return false;
} }
ASSERT_MSG(device_addr == image.info.guest_address, ASSERT_MSG(device_addr == image.info.guest_address,

View file

@ -210,9 +210,6 @@ bool Instance::CreateDevice() {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
features = feature_chain.get().features; features = feature_chain.get().features;
#ifdef __APPLE__
portability_features = feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
#endif
const vk::StructureChain properties_chain = physical_device.getProperties2< const vk::StructureChain properties_chain = physical_device.getProperties2<
vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties, vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties,
@ -258,16 +255,19 @@ bool Instance::CreateDevice() {
add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
dynamic_color_write_mask = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
if (dynamic_color_write_mask) { if (dynamic_state_3) {
dynamic_color_write_mask = dynamic_state_3_features =
feature_chain.get<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>() feature_chain.get<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
.extendedDynamicState3ColorWriteMask; LOG_INFO(Render_Vulkan, "- extendedDynamicState3ColorWriteMask: {}",
dynamic_state_3_features.extendedDynamicState3ColorWriteMask);
} }
null_descriptor = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); robustness2 = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
if (null_descriptor) { if (robustness2) {
null_descriptor = robustness2_features = feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>();
feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor; LOG_INFO(Render_Vulkan, "- robustBufferAccess2: {}",
robustness2_features.robustBufferAccess2);
LOG_INFO(Render_Vulkan, "- nullDescriptor: {}", robustness2_features.nullDescriptor);
} }
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
@ -284,6 +284,9 @@ bool Instance::CreateDevice() {
#ifdef __APPLE__ #ifdef __APPLE__
// Required by Vulkan spec if supported. // Required by Vulkan spec if supported.
portability_subset = add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); portability_subset = add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
if (portability_subset) {
portability_features = feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
}
#endif #endif
const auto family_properties = physical_device.getQueueFamilyProperties(); const auto family_properties = physical_device.getQueueFamilyProperties();
@ -387,13 +390,15 @@ bool Instance::CreateDevice() {
.customBorderColorWithoutFormat = true, .customBorderColorWithoutFormat = true,
}, },
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
.extendedDynamicState3ColorWriteMask = true, .extendedDynamicState3ColorWriteMask =
dynamic_state_3_features.extendedDynamicState3ColorWriteMask,
}, },
vk::PhysicalDeviceDepthClipControlFeaturesEXT{ vk::PhysicalDeviceDepthClipControlFeaturesEXT{
.depthClipControl = true, .depthClipControl = true,
}, },
vk::PhysicalDeviceRobustness2FeaturesEXT{ vk::PhysicalDeviceRobustness2FeaturesEXT{
.nullDescriptor = true, .robustBufferAccess2 = robustness2_features.robustBufferAccess2,
.nullDescriptor = robustness2_features.nullDescriptor,
}, },
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{ vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
.vertexInputDynamicState = true, .vertexInputDynamicState = true,
@ -420,13 +425,13 @@ bool Instance::CreateDevice() {
if (!custom_border_color) { if (!custom_border_color) {
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
} }
if (!dynamic_color_write_mask) { if (!dynamic_state_3) {
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
} }
if (!depth_clip_control) { if (!depth_clip_control) {
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
} }
if (!null_descriptor) { if (!robustness2) {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
} }
if (!vertex_input_dynamic_state) { if (!vertex_input_dynamic_state) {

View file

@ -99,9 +99,10 @@ public:
return depth_clip_control; return depth_clip_control;
} }
/// Returns true when dynamic color write mask state is supported /// Returns true when the extendedDynamicState3ColorWriteMask feature of
/// VK_EXT_extended_dynamic_state3 is supported.
bool IsDynamicColorWriteMaskSupported() const { bool IsDynamicColorWriteMaskSupported() const {
return dynamic_color_write_mask; return dynamic_state_3 && dynamic_state_3_features.extendedDynamicState3ColorWriteMask;
} }
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported. /// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
@ -109,9 +110,14 @@ public:
return vertex_input_dynamic_state; return vertex_input_dynamic_state;
} }
/// Returns true when the robustBufferAccess2 feature of VK_EXT_robustness2 is supported.
bool IsRobustBufferAccess2Supported() const {
return robustness2 && robustness2_features.robustBufferAccess2;
}
/// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported. /// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported.
bool IsNullDescriptorSupported() const { bool IsNullDescriptorSupported() const {
return null_descriptor; return robustness2 && robustness2_features.nullDescriptor;
} }
/// Returns true when VK_KHR_fragment_shader_barycentric is supported. /// Returns true when VK_KHR_fragment_shader_barycentric is supported.
@ -303,6 +309,8 @@ private:
vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props; vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
vk::PhysicalDeviceFeatures features; vk::PhysicalDeviceFeatures features;
vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features;
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
vk::DriverIdKHR driver_id; vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{}; vk::UniqueDebugUtilsMessengerEXT debug_callback{};
std::string vendor_name; std::string vendor_name;
@ -317,9 +325,9 @@ private:
bool custom_border_color{}; bool custom_border_color{};
bool fragment_shader_barycentric{}; bool fragment_shader_barycentric{};
bool depth_clip_control{}; bool depth_clip_control{};
bool dynamic_color_write_mask{}; bool dynamic_state_3{};
bool vertex_input_dynamic_state{}; bool vertex_input_dynamic_state{};
bool null_descriptor{}; bool robustness2{};
bool list_restart{}; bool list_restart{};
bool legacy_vertex_attributes{}; bool legacy_vertex_attributes{};
bool shader_stencil_export{}; bool shader_stencil_export{};

View file

@ -200,6 +200,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||

View file

@ -447,7 +447,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
set_writes.clear(); set_writes.clear();
buffer_barriers.clear(); buffer_barriers.clear();
buffer_infos.clear(); buffer_infos.clear();
buffer_views.clear();
image_infos.clear(); image_infos.clear();
// Bind resource buffers and textures. // Bind resource buffers and textures.

View file

@ -110,18 +110,17 @@ private:
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8> std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
cb_descs; cb_descs;
std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc; std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc;
boost::container::static_vector<vk::DescriptorImageInfo, 64> image_infos; boost::container::static_vector<vk::DescriptorImageInfo, Shader::NumImages> image_infos;
boost::container::static_vector<vk::BufferView, 16> buffer_views; boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NumBuffers> buffer_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos; boost::container::static_vector<VideoCore::ImageId, Shader::NumImages> bound_images;
boost::container::static_vector<VideoCore::ImageId, 64> bound_images;
Pipeline::DescriptorWrites set_writes; Pipeline::DescriptorWrites set_writes;
Pipeline::BufferBarriers buffer_barriers; Pipeline::BufferBarriers buffer_barriers;
using BufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>; using BufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
boost::container::static_vector<BufferBindingInfo, 32> buffer_bindings; boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>; using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
boost::container::static_vector<ImageBindingInfo, 64> image_bindings; boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings;
}; };
} // namespace Vulkan } // namespace Vulkan