shader_recompiler: Reorganize data share operations and implement GDS bit

This commit is contained in:
IndecisiveTurtle 2025-07-09 23:37:16 +03:00
parent dc6ef99dc7
commit 27b243cae6
11 changed files with 422 additions and 248 deletions

View file

@ -54,17 +54,23 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
});
}
Id SharedAtomicU64IncDec(EmitContext& ctx, Id offset,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(3U)};
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics);
});
}
template <bool is_float = false>
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
const auto type = [&] {
if constexpr (is_float) {
return ctx.F32[1];
} else {
return ctx.U32[1];
}
}();
const Id type = is_float ? ctx.F32[1] : ctx.U32[1];
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
@ -148,42 +154,82 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
}
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicOr);
}
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
}
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicXor);
}
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
}
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicISub);
}
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
}
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset) {
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
}
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
}
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset) {
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
}
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
}

View file

@ -139,15 +139,25 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset);
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset);
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);

View file

@ -76,6 +76,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
} else {
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
}
String(fmt::format("{:#x}", info.pgm_hash));
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();

View file

@ -3,7 +3,6 @@
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn {
@ -12,29 +11,29 @@ void Translator::EmitDataShare(const GcnInst& inst) {
switch (inst.opcode) {
// DS
case Opcode::DS_ADD_U32:
return DS_ADD_U32(inst, false);
return DS_OP(inst, AtomicOp::Add, false);
case Opcode::DS_ADD_U64:
return DS_ADD_U64(inst, false);
return DS_OP<IR::U64>(inst, AtomicOp::Add, false);
case Opcode::DS_SUB_U32:
return DS_SUB_U32(inst, false);
return DS_OP(inst, AtomicOp::Sub, false);
case Opcode::DS_INC_U32:
return DS_INC_U32(inst, false);
return DS_OP(inst, AtomicOp::Inc, false);
case Opcode::DS_DEC_U32:
return DS_DEC_U32(inst, false);
return DS_OP(inst, AtomicOp::Dec, false);
case Opcode::DS_MIN_I32:
return DS_MIN_U32(inst, true, false);
return DS_OP(inst, AtomicOp::Smin, false);
case Opcode::DS_MAX_I32:
return DS_MAX_U32(inst, true, false);
return DS_OP(inst, AtomicOp::Smax, false);
case Opcode::DS_MIN_U32:
return DS_MIN_U32(inst, false, false);
return DS_OP(inst, AtomicOp::Umin, false);
case Opcode::DS_MAX_U32:
return DS_MAX_U32(inst, false, false);
return DS_OP(inst, AtomicOp::Umax, false);
case Opcode::DS_AND_B32:
return DS_AND_B32(inst, false);
return DS_OP(inst, AtomicOp::And, false);
case Opcode::DS_OR_B32:
return DS_OR_B32(inst, false);
return DS_OP(inst, AtomicOp::Or, false);
case Opcode::DS_XOR_B32:
return DS_XOR_B32(inst, false);
return DS_OP(inst, AtomicOp::Xor, false);
case Opcode::DS_WRITE_B32:
return DS_WRITE(32, false, false, false, inst);
case Opcode::DS_WRITE2_B32:
@ -42,19 +41,19 @@ void Translator::EmitDataShare(const GcnInst& inst) {
case Opcode::DS_WRITE2ST64_B32:
return DS_WRITE(32, false, true, true, inst);
case Opcode::DS_ADD_RTN_U32:
return DS_ADD_U32(inst, true);
return DS_OP(inst, AtomicOp::Add, true);
case Opcode::DS_SUB_RTN_U32:
return DS_SUB_U32(inst, true);
return DS_OP(inst, AtomicOp::Sub, true);
case Opcode::DS_MIN_RTN_U32:
return DS_MIN_U32(inst, false, true);
return DS_OP(inst, AtomicOp::Umin, true);
case Opcode::DS_MAX_RTN_U32:
return DS_MAX_U32(inst, false, true);
return DS_OP(inst, AtomicOp::Umax, true);
case Opcode::DS_AND_RTN_B32:
return DS_AND_B32(inst, true);
return DS_OP(inst, AtomicOp::And, true);
case Opcode::DS_OR_RTN_B32:
return DS_OR_B32(inst, true);
return DS_OP(inst, AtomicOp::Or, true);
case Opcode::DS_XOR_RTN_B32:
return DS_XOR_B32(inst, true);
return DS_OP(inst, AtomicOp::Xor, true);
case Opcode::DS_SWIZZLE_B32:
return DS_SWIZZLE_B32(inst);
case Opcode::DS_READ_B32:
@ -117,92 +116,63 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
// DS
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
template <typename T>
void Translator::DS_OP(const GcnInst& inst, AtomicOp op, bool rtn) {
const bool is_gds = inst.control.ds.gds;
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const T data = [&] {
if (op == AtomicOp::Inc || op == AtomicOp::Dec) {
return T{};
}
if constexpr (std::is_same_v<T, IR::U32>) {
return GetSrc(inst.src[1]);
} else {
return GetSrc64(inst.src[1]);
}
}();
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
const T original_val = [&] -> T {
switch (op) {
case AtomicOp::Add:
return ir.SharedAtomicIAdd(addr_offset, data, is_gds);
case AtomicOp::Umin:
return ir.SharedAtomicIMin(addr_offset, data, false, is_gds);
case AtomicOp::Smin:
return ir.SharedAtomicIMin(addr_offset, data, true, is_gds);
case AtomicOp::Umax:
return ir.SharedAtomicIMax(addr_offset, data, false, is_gds);
case AtomicOp::Smax:
return ir.SharedAtomicIMax(addr_offset, data, true, is_gds);
case AtomicOp::And:
return ir.SharedAtomicAnd(addr_offset, data, is_gds);
case AtomicOp::Or:
return ir.SharedAtomicOr(addr_offset, data, is_gds);
case AtomicOp::Xor:
return ir.SharedAtomicXor(addr_offset, data, is_gds);
case AtomicOp::Sub:
return ir.SharedAtomicISub(addr_offset, data, is_gds);
case AtomicOp::Inc:
return ir.SharedAtomicInc<T>(addr_offset, is_gds);
case AtomicOp::Dec:
return ir.SharedAtomicDec<T>(addr_offset, is_gds);
default:
UNREACHABLE();
}
}();
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U64 data{GetSrc64(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) {
SetDst64(inst.dst[0], IR::U64{original_val});
}
}
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
if constexpr (std::is_same_v<T, IR::U32>) {
SetDst(inst.dst[0], original_val);
} else {
SetDst64(inst.dst[0], original_val);
}
}
}
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) {
const bool is_gds = inst.control.ds.gds;
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
const IR::VectorReg data0{inst.src[1].code};
const IR::VectorReg data1{inst.src[2].code};
@ -220,85 +190,40 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
ir.WriteShared(64,
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
ir.GetVectorReg(data0 + 1))),
addr0);
addr0, is_gds);
} else if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
ir.WriteShared(32, ir.GetVectorReg(data0), addr0, is_gds);
} else if (bit_size == 16) {
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
}
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
if (bit_size == 64) {
ir.WriteShared(64,
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
ir.GetVectorReg(data1 + 1))),
addr1);
addr1, is_gds);
} else if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
ir.WriteShared(32, ir.GetVectorReg(data1), addr1, is_gds);
} else if (bit_size == 16) {
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1);
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1, is_gds);
}
} else {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
if (bit_size == 64) {
const IR::Value data =
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0, is_gds);
} else if (bit_size == 32) {
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0, is_gds);
} else if (bit_size == 16) {
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
}
}
}
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
const u8 offset0 = inst.control.ds.offset0;
const u8 offset1 = inst.control.ds.offset1;
const IR::U32 src{GetSrc(inst.src[0])};
// ASSERT(offset1 & 0x80);
const IR::U32 lane_id = ir.LaneId();
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
const IR::U32 index = ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2));
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
}
void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicInc(addr_offset);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicDec(addr_offset);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) {
const bool is_gds = inst.control.ds.gds;
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
IR::VectorReg dst_reg{inst.dst[0].code};
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
@ -312,7 +237,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
// Pair loads are either 32 or 64-bit
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
if (bit_size == 64) {
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
@ -323,7 +248,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
}
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1, is_gds);
if (bit_size == 64) {
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
@ -335,7 +260,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
}
} else {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
if (bit_size == 64) {
const auto vector = ir.UnpackUint2x32(IR::U64{data});
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
@ -348,6 +273,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
}
}
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
const u8 offset0 = inst.control.ds.offset0;
const u8 offset1 = inst.control.ds.offset1;
const IR::U32 src{GetSrc(inst.src[0])};
ASSERT(offset1 & 0x80);
const IR::U32 lane_id = ir.LaneId();
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
const IR::U32 index = ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2));
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
}
void Translator::DS_APPEND(const GcnInst& inst) {
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));

View file

@ -270,21 +270,13 @@ public:
// Data share
// DS
void DS_ADD_U32(const GcnInst& inst, bool rtn);
void DS_ADD_U64(const GcnInst& inst, bool rtn);
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
template <typename T = IR::U32>
void DS_OP(const GcnInst& inst, AtomicOp op, bool rtn);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_AND_B32(const GcnInst& inst, bool rtn);
void DS_OR_B32(const GcnInst& inst, bool rtn);
void DS_XOR_B32(const GcnInst& inst, bool rtn);
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_APPEND(const GcnInst& inst);
void DS_CONSUME(const GcnInst& inst);
void DS_SUB_U32(const GcnInst& inst, bool rtn);
void DS_INC_U32(const GcnInst& inst, bool rtn);
void DS_DEC_U32(const GcnInst& inst, bool rtn);
// Buffer Memory
// MUBUF / MTBUF

View file

@ -565,7 +565,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
}
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
if ((inst.src[0].field == OperandField::ExecHi ||
inst.src[0].field == OperandField::VccHi) &&
inst.src[0].field == OperandField::VccHi ||
inst.src[0].field == OperandField::ScalarGPR) &&
(inst.src[1].field == OperandField::ConstZero ||
inst.src[1].field == OperandField::VectorGPR)) {
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
@ -579,7 +580,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
}
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
// used combined with above for append buffer indexing.
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo ||
inst.src[0].field == OperandField::ScalarGPR) {
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
}
UNREACHABLE();

View file

@ -291,78 +291,137 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
Inst(Opcode::SetPatch, patch, value);
}
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset, bool is_gds) {
switch (bit_size) {
case 16:
return Inst<U16>(Opcode::LoadSharedU16, offset);
return Inst<U16>(Opcode::LoadSharedU16, Flags{is_gds}, offset);
case 32:
return Inst<U32>(Opcode::LoadSharedU32, offset);
return Inst<U32>(Opcode::LoadSharedU32, Flags{is_gds}, offset);
case 64:
return Inst<U64>(Opcode::LoadSharedU64, offset);
return Inst<U64>(Opcode::LoadSharedU64, Flags{is_gds}, offset);
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
}
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds) {
switch (bit_size) {
case 16:
Inst(Opcode::WriteSharedU16, offset, value);
Inst(Opcode::WriteSharedU16, Flags{is_gds}, offset, value);
break;
case 32:
Inst(Opcode::WriteSharedU32, offset, value);
Inst(Opcode::WriteSharedU32, Flags{is_gds}, offset, value);
break;
case 64:
Inst(Opcode::WriteSharedU64, offset, value);
Inst(Opcode::WriteSharedU64, Flags{is_gds}, offset, value);
break;
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
}
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
return Inst<U32>(Opcode::SharedAtomicIAdd32, Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
return Inst<U64>(Opcode::SharedAtomicIAdd64, Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
U32U64 IREmitter::SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMin32 : Opcode::SharedAtomicUMin32,
Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMin64 : Opcode::SharedAtomicUMin64,
Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
U32U64 IREmitter::SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMax32 : Opcode::SharedAtomicUMax32,
Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMax64 : Opcode::SharedAtomicUMax64,
Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicAnd32, address, data);
U32U64 IREmitter::SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) {
U32U64 IREmitter::SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
}
U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
U32U64 IREmitter::SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicXor32, Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicXor64, Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicInc(const U32& address) {
return Inst<U32>(Opcode::SharedAtomicInc32, address);
U32U64 IREmitter::SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicISub32, Flags{is_gds}, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicISub64, Flags{is_gds}, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicDec(const U32& address) {
return Inst<U32>(Opcode::SharedAtomicDec32, address);
template <>
U32 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
return Inst<U32>(Opcode::SharedAtomicInc32, Flags{is_gds}, address);
}
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicISub32, address, data);
template <>
U64 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
return Inst<U64>(Opcode::SharedAtomicInc64, Flags{is_gds}, address);
}
template <>
U32 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
return Inst<U32>(Opcode::SharedAtomicDec32, Flags{is_gds}, address);
}
template <>
U64 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
return Inst<U64>(Opcode::SharedAtomicDec64, Flags{is_gds}, address);
}
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {

View file

@ -96,18 +96,24 @@ public:
[[nodiscard]] F32 GetPatch(Patch patch);
void SetPatch(Patch patch, const F32& value);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset,
bool is_gds = false);
void WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds = false);
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicInc(const U32& address);
[[nodiscard]] U32 SharedAtomicDec(const U32& address);
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds);
[[nodiscard]] U32U64 SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds);
[[nodiscard]] U32U64 SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
bool is_gds);
[[nodiscard]] U32U64 SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
bool is_gds);
[[nodiscard]] U32U64 SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds);
[[nodiscard]] U32U64 SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds);
[[nodiscard]] U32U64 SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds);
template <typename T = U32>
[[nodiscard]] T SharedAtomicInc(const U32& address, bool is_gds);
template <typename T = U32>
[[nodiscard]] T SharedAtomicDec(const U32& address, bool is_gds);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);

View file

@ -41,15 +41,25 @@ OPCODE(WriteSharedU64, Void, U32,
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
OPCODE(SharedAtomicISub32, U32, U32, U32, )
OPCODE(SharedAtomicISub64, U64, U32, U64, )
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMin64, U64, U32, U64, )
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicUMin64, U64, U32, U64, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
OPCODE(SharedAtomicSMax64, U64, U32, U64, )
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
OPCODE(SharedAtomicUMax64, U64, U32, U64, )
OPCODE(SharedAtomicInc32, U32, U32, )
OPCODE(SharedAtomicInc64, U64, U32, )
OPCODE(SharedAtomicDec32, U32, U32, )
OPCODE(SharedAtomicDec64, U64, U32, )
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
OPCODE(SharedAtomicAnd64, U64, U32, U64, )
OPCODE(SharedAtomicOr32, U32, U32, U32, )
OPCODE(SharedAtomicOr64, U64, U32, U64, )
OPCODE(SharedAtomicXor32, U32, U32, U32, )
OPCODE(SharedAtomicXor64, U64, U32, U64, )
// Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, )

View file

@ -84,8 +84,42 @@ bool IsBufferInstruction(const IR::Inst& inst) {
}
bool IsDataRingInstruction(const IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::DataAppend ||
inst.GetOpcode() == IR::Opcode::DataConsume;
switch (inst.GetOpcode()) {
case IR::Opcode::DataAppend:
case IR::Opcode::DataConsume:
return true;
case IR::Opcode::LoadSharedU16:
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU16:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64:
case IR::Opcode::SharedAtomicUMin32:
case IR::Opcode::SharedAtomicUMin64:
case IR::Opcode::SharedAtomicSMin32:
case IR::Opcode::SharedAtomicSMin64:
case IR::Opcode::SharedAtomicUMax32:
case IR::Opcode::SharedAtomicUMax64:
case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicSMax64:
case IR::Opcode::SharedAtomicAnd32:
case IR::Opcode::SharedAtomicAnd64:
case IR::Opcode::SharedAtomicOr32:
case IR::Opcode::SharedAtomicOr64:
case IR::Opcode::SharedAtomicXor32:
case IR::Opcode::SharedAtomicXor64:
case IR::Opcode::SharedAtomicISub32:
case IR::Opcode::SharedAtomicISub64:
case IR::Opcode::SharedAtomicInc32:
case IR::Opcode::SharedAtomicInc64:
case IR::Opcode::SharedAtomicDec32:
case IR::Opcode::SharedAtomicDec64:
return inst.Flags<bool>(); // is_gds
default:
return false;
}
}
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
@ -507,7 +541,8 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
}
}
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
@ -515,37 +550,111 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
.is_written = true,
});
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
// Attempt to deduce the GDS address of counter at compile time.
u32 gds_addr = 0;
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
gds_addr = gds_offset.U32() & 0xFFFF;
} else {
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
gds_addr = m0_val & 0xFFFF;
}
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
// For data append/consume operations attempt to deduce the GDS address.
if (inst.GetOpcode() == IR::Opcode::DataAppend || inst.GetOpcode() == IR::Opcode::DataConsume) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
u32 gds_addr = 0;
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
gds_addr = gds_offset.U32() & 0xFFFF;
} else {
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
gds_addr = m0_val & 0xFFFF;
}
// Patch instruction.
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
} else {
// Convert shared memory opcode to storage buffer atomic to GDS buffer.
const IR::U32 offset = IR::U32{inst.Arg(0)};
const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
const IR::U32 address_qwords = ir.ShiftRightLogical(offset, ir.Imm32(3));
const IR::U32 handle = ir.Imm32(binding);
switch (inst.GetOpcode()) {
case IR::Opcode::SharedAtomicIAdd32:
inst.ReplaceUsesWith(ir.BufferAtomicIAdd(handle, address_dwords, inst.Arg(1), {}));
break;
case IR::Opcode::SharedAtomicIAdd64:
inst.ReplaceUsesWith(
ir.BufferAtomicIAdd(handle, address_qwords, IR::U64{inst.Arg(1)}, {}));
break;
case IR::Opcode::SharedAtomicISub32:
inst.ReplaceUsesWith(ir.BufferAtomicISub(handle, address_dwords, inst.Arg(1), {}));
break;
case IR::Opcode::SharedAtomicSMin32:
case IR::Opcode::SharedAtomicUMin32: {
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
inst.ReplaceUsesWith(
ir.BufferAtomicIMin(handle, address_dwords, inst.Arg(1), is_signed, {}));
break;
}
case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32: {
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
inst.ReplaceUsesWith(
ir.BufferAtomicIMax(handle, address_dwords, inst.Arg(1), is_signed, {}));
break;
}
case IR::Opcode::SharedAtomicInc32:
inst.ReplaceUsesWith(ir.BufferAtomicInc(handle, address_dwords, {}));
break;
case IR::Opcode::SharedAtomicDec32:
inst.ReplaceUsesWith(ir.BufferAtomicDec(handle, address_dwords, {}));
break;
case IR::Opcode::SharedAtomicAnd32:
inst.ReplaceUsesWith(ir.BufferAtomicAnd(handle, address_dwords, inst.Arg(1), {}));
break;
case IR::Opcode::SharedAtomicOr32:
inst.ReplaceUsesWith(ir.BufferAtomicOr(handle, address_dwords, inst.Arg(1), {}));
break;
case IR::Opcode::SharedAtomicXor32:
inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
break;
case IR::Opcode::LoadSharedU16:
inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
break;
case IR::Opcode::LoadSharedU32:
inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
break;
case IR::Opcode::LoadSharedU64:
inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
break;
case IR::Opcode::WriteSharedU16:
ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
inst.Invalidate();
break;
case IR::Opcode::WriteSharedU32:
ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
inst.Invalidate();
break;
case IR::Opcode::WriteSharedU64:
ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
inst.Invalidate();
break;
default:
UNREACHABLE();
}
}
}
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
@ -916,8 +1025,6 @@ void ResourceTrackingPass(IR::Program& program) {
PatchBufferSharp(*block, inst, info, descriptors);
} else if (IsImageInstruction(inst)) {
PatchImageSharp(*block, inst, info, descriptors);
} else if (IsDataRingInstruction(inst)) {
PatchDataRingAccess(*block, inst, info, descriptors);
}
}
}
@ -929,6 +1036,8 @@ void ResourceTrackingPass(IR::Program& program) {
PatchBufferArgs(*block, inst, info);
} else if (IsImageInstruction(inst)) {
PatchImageArgs(*block, inst, info);
} else if (IsDataRingInstruction(inst)) {
PatchGlobalDataShareAccess(*block, inst, info, descriptors);
}
}
}

View file

@ -48,6 +48,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
memory_tracker = std::make_unique<MemoryTracker>(tracker);
std::memset(gds_buffer.mapped_data.data(), 0, DataShareBufferSize);
// Ensure the first slot is used for the null buffer
const auto null_id =
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);