mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
shader_recompiler: Reorganize data share operations and implement GDS bit
This commit is contained in:
parent
dc6ef99dc7
commit
27b243cae6
11 changed files with 422 additions and 248 deletions
|
@ -54,17 +54,23 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id SharedAtomicU64IncDec(EmitContext& ctx, Id offset,
|
||||||
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||||
|
const Id shift_id{ctx.ConstU32(3U)};
|
||||||
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
|
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)};
|
||||||
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
|
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
template <bool is_float = false>
|
template <bool is_float = false>
|
||||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
const auto& buffer = ctx.buffers[handle];
|
const auto& buffer = ctx.buffers[handle];
|
||||||
const auto type = [&] {
|
const Id type = is_float ? ctx.F32[1] : ctx.U32[1];
|
||||||
if constexpr (is_float) {
|
|
||||||
return ctx.F32[1];
|
|
||||||
} else {
|
|
||||||
return ctx.U32[1];
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||||
}
|
}
|
||||||
|
@ -148,42 +154,82 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
|
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
|
||||||
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset) {
|
||||||
|
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
|
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
|
||||||
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset) {
|
||||||
|
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,15 +139,25 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
|
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
|
||||||
|
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset);
|
||||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
|
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
|
||||||
|
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset);
|
||||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value);
|
||||||
|
|
||||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
||||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
|
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
|
||||||
|
|
|
@ -76,6 +76,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
||||||
} else {
|
} else {
|
||||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||||
}
|
}
|
||||||
|
String(fmt::format("{:#x}", info.pgm_hash));
|
||||||
|
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/profile.h"
|
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
@ -12,29 +11,29 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||||
switch (inst.opcode) {
|
switch (inst.opcode) {
|
||||||
// DS
|
// DS
|
||||||
case Opcode::DS_ADD_U32:
|
case Opcode::DS_ADD_U32:
|
||||||
return DS_ADD_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Add, false);
|
||||||
case Opcode::DS_ADD_U64:
|
case Opcode::DS_ADD_U64:
|
||||||
return DS_ADD_U64(inst, false);
|
return DS_OP<IR::U64>(inst, AtomicOp::Add, false);
|
||||||
case Opcode::DS_SUB_U32:
|
case Opcode::DS_SUB_U32:
|
||||||
return DS_SUB_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Sub, false);
|
||||||
case Opcode::DS_INC_U32:
|
case Opcode::DS_INC_U32:
|
||||||
return DS_INC_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Inc, false);
|
||||||
case Opcode::DS_DEC_U32:
|
case Opcode::DS_DEC_U32:
|
||||||
return DS_DEC_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Dec, false);
|
||||||
case Opcode::DS_MIN_I32:
|
case Opcode::DS_MIN_I32:
|
||||||
return DS_MIN_U32(inst, true, false);
|
return DS_OP(inst, AtomicOp::Smin, false);
|
||||||
case Opcode::DS_MAX_I32:
|
case Opcode::DS_MAX_I32:
|
||||||
return DS_MAX_U32(inst, true, false);
|
return DS_OP(inst, AtomicOp::Smax, false);
|
||||||
case Opcode::DS_MIN_U32:
|
case Opcode::DS_MIN_U32:
|
||||||
return DS_MIN_U32(inst, false, false);
|
return DS_OP(inst, AtomicOp::Umin, false);
|
||||||
case Opcode::DS_MAX_U32:
|
case Opcode::DS_MAX_U32:
|
||||||
return DS_MAX_U32(inst, false, false);
|
return DS_OP(inst, AtomicOp::Umax, false);
|
||||||
case Opcode::DS_AND_B32:
|
case Opcode::DS_AND_B32:
|
||||||
return DS_AND_B32(inst, false);
|
return DS_OP(inst, AtomicOp::And, false);
|
||||||
case Opcode::DS_OR_B32:
|
case Opcode::DS_OR_B32:
|
||||||
return DS_OR_B32(inst, false);
|
return DS_OP(inst, AtomicOp::Or, false);
|
||||||
case Opcode::DS_XOR_B32:
|
case Opcode::DS_XOR_B32:
|
||||||
return DS_XOR_B32(inst, false);
|
return DS_OP(inst, AtomicOp::Xor, false);
|
||||||
case Opcode::DS_WRITE_B32:
|
case Opcode::DS_WRITE_B32:
|
||||||
return DS_WRITE(32, false, false, false, inst);
|
return DS_WRITE(32, false, false, false, inst);
|
||||||
case Opcode::DS_WRITE2_B32:
|
case Opcode::DS_WRITE2_B32:
|
||||||
|
@ -42,19 +41,19 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||||
case Opcode::DS_WRITE2ST64_B32:
|
case Opcode::DS_WRITE2ST64_B32:
|
||||||
return DS_WRITE(32, false, true, true, inst);
|
return DS_WRITE(32, false, true, true, inst);
|
||||||
case Opcode::DS_ADD_RTN_U32:
|
case Opcode::DS_ADD_RTN_U32:
|
||||||
return DS_ADD_U32(inst, true);
|
return DS_OP(inst, AtomicOp::Add, true);
|
||||||
case Opcode::DS_SUB_RTN_U32:
|
case Opcode::DS_SUB_RTN_U32:
|
||||||
return DS_SUB_U32(inst, true);
|
return DS_OP(inst, AtomicOp::Sub, true);
|
||||||
case Opcode::DS_MIN_RTN_U32:
|
case Opcode::DS_MIN_RTN_U32:
|
||||||
return DS_MIN_U32(inst, false, true);
|
return DS_OP(inst, AtomicOp::Umin, true);
|
||||||
case Opcode::DS_MAX_RTN_U32:
|
case Opcode::DS_MAX_RTN_U32:
|
||||||
return DS_MAX_U32(inst, false, true);
|
return DS_OP(inst, AtomicOp::Umax, true);
|
||||||
case Opcode::DS_AND_RTN_B32:
|
case Opcode::DS_AND_RTN_B32:
|
||||||
return DS_AND_B32(inst, true);
|
return DS_OP(inst, AtomicOp::And, true);
|
||||||
case Opcode::DS_OR_RTN_B32:
|
case Opcode::DS_OR_RTN_B32:
|
||||||
return DS_OR_B32(inst, true);
|
return DS_OP(inst, AtomicOp::Or, true);
|
||||||
case Opcode::DS_XOR_RTN_B32:
|
case Opcode::DS_XOR_RTN_B32:
|
||||||
return DS_XOR_B32(inst, true);
|
return DS_OP(inst, AtomicOp::Xor, true);
|
||||||
case Opcode::DS_SWIZZLE_B32:
|
case Opcode::DS_SWIZZLE_B32:
|
||||||
return DS_SWIZZLE_B32(inst);
|
return DS_SWIZZLE_B32(inst);
|
||||||
case Opcode::DS_READ_B32:
|
case Opcode::DS_READ_B32:
|
||||||
|
@ -117,92 +116,63 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
||||||
|
|
||||||
// DS
|
// DS
|
||||||
|
|
||||||
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
template <typename T>
|
||||||
|
void Translator::DS_OP(const GcnInst& inst, AtomicOp op, bool rtn) {
|
||||||
|
const bool is_gds = inst.control.ds.gds;
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
const T data = [&] {
|
||||||
|
if (op == AtomicOp::Inc || op == AtomicOp::Dec) {
|
||||||
|
return T{};
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<T, IR::U32>) {
|
||||||
|
return GetSrc(inst.src[1]);
|
||||||
|
} else {
|
||||||
|
return GetSrc64(inst.src[1]);
|
||||||
|
}
|
||||||
|
}();
|
||||||
const IR::U32 offset =
|
const IR::U32 offset =
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
const T original_val = [&] -> T {
|
||||||
|
switch (op) {
|
||||||
|
case AtomicOp::Add:
|
||||||
|
return ir.SharedAtomicIAdd(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Umin:
|
||||||
|
return ir.SharedAtomicIMin(addr_offset, data, false, is_gds);
|
||||||
|
case AtomicOp::Smin:
|
||||||
|
return ir.SharedAtomicIMin(addr_offset, data, true, is_gds);
|
||||||
|
case AtomicOp::Umax:
|
||||||
|
return ir.SharedAtomicIMax(addr_offset, data, false, is_gds);
|
||||||
|
case AtomicOp::Smax:
|
||||||
|
return ir.SharedAtomicIMax(addr_offset, data, true, is_gds);
|
||||||
|
case AtomicOp::And:
|
||||||
|
return ir.SharedAtomicAnd(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Or:
|
||||||
|
return ir.SharedAtomicOr(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Xor:
|
||||||
|
return ir.SharedAtomicXor(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Sub:
|
||||||
|
return ir.SharedAtomicISub(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Inc:
|
||||||
|
return ir.SharedAtomicInc<T>(addr_offset, is_gds);
|
||||||
|
case AtomicOp::Dec:
|
||||||
|
return ir.SharedAtomicDec<T>(addr_offset, is_gds);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}();
|
||||||
if (rtn) {
|
if (rtn) {
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
if constexpr (std::is_same_v<T, IR::U32>) {
|
||||||
}
|
SetDst(inst.dst[0], original_val);
|
||||||
}
|
} else {
|
||||||
|
SetDst64(inst.dst[0], original_val);
|
||||||
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
|
}
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U64 data{GetSrc64(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst64(inst.dst[0], IR::U64{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||||
const GcnInst& inst) {
|
const GcnInst& inst) {
|
||||||
|
const bool is_gds = inst.control.ds.gds;
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
const IR::VectorReg data0{inst.src[1].code};
|
const IR::VectorReg data0{inst.src[1].code};
|
||||||
const IR::VectorReg data1{inst.src[2].code};
|
const IR::VectorReg data1{inst.src[2].code};
|
||||||
|
@ -220,85 +190,40 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
||||||
ir.WriteShared(64,
|
ir.WriteShared(64,
|
||||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
||||||
ir.GetVectorReg(data0 + 1))),
|
ir.GetVectorReg(data0 + 1))),
|
||||||
addr0);
|
addr0, is_gds);
|
||||||
} else if (bit_size == 32) {
|
} else if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(32, ir.GetVectorReg(data0), addr0, is_gds);
|
||||||
} else if (bit_size == 16) {
|
} else if (bit_size == 16) {
|
||||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
|
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
ir.WriteShared(64,
|
ir.WriteShared(64,
|
||||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
||||||
ir.GetVectorReg(data1 + 1))),
|
ir.GetVectorReg(data1 + 1))),
|
||||||
addr1);
|
addr1, is_gds);
|
||||||
} else if (bit_size == 32) {
|
} else if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
ir.WriteShared(32, ir.GetVectorReg(data1), addr1, is_gds);
|
||||||
} else if (bit_size == 16) {
|
} else if (bit_size == 16) {
|
||||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1);
|
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1, is_gds);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
const IR::Value data =
|
const IR::Value data =
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||||
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
|
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0, is_gds);
|
||||||
} else if (bit_size == 32) {
|
} else if (bit_size == 32) {
|
||||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0, is_gds);
|
||||||
} else if (bit_size == 16) {
|
} else if (bit_size == 16) {
|
||||||
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
|
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
|
||||||
const u8 offset0 = inst.control.ds.offset0;
|
|
||||||
const u8 offset1 = inst.control.ds.offset1;
|
|
||||||
const IR::U32 src{GetSrc(inst.src[0])};
|
|
||||||
// ASSERT(offset1 & 0x80);
|
|
||||||
const IR::U32 lane_id = ir.LaneId();
|
|
||||||
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
|
|
||||||
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
|
|
||||||
const IR::U32 index = ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2));
|
|
||||||
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicInc(addr_offset);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicDec(addr_offset);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||||
const GcnInst& inst) {
|
const GcnInst& inst) {
|
||||||
|
const bool is_gds = inst.control.ds.gds;
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
||||||
|
@ -312,7 +237,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||||
// Pair loads are either 32 or 64-bit
|
// Pair loads are either 32 or 64-bit
|
||||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||||
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
@ -323,7 +248,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1, is_gds);
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
@ -335,7 +260,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
@ -348,6 +273,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||||
|
const u8 offset0 = inst.control.ds.offset0;
|
||||||
|
const u8 offset1 = inst.control.ds.offset1;
|
||||||
|
const IR::U32 src{GetSrc(inst.src[0])};
|
||||||
|
ASSERT(offset1 & 0x80);
|
||||||
|
const IR::U32 lane_id = ir.LaneId();
|
||||||
|
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
|
||||||
|
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
|
||||||
|
const IR::U32 index = ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2));
|
||||||
|
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::DS_APPEND(const GcnInst& inst) {
|
void Translator::DS_APPEND(const GcnInst& inst) {
|
||||||
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
||||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||||
|
|
|
@ -270,21 +270,13 @@ public:
|
||||||
|
|
||||||
// Data share
|
// Data share
|
||||||
// DS
|
// DS
|
||||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
template <typename T = IR::U32>
|
||||||
void DS_ADD_U64(const GcnInst& inst, bool rtn);
|
void DS_OP(const GcnInst& inst, AtomicOp op, bool rtn);
|
||||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
|
||||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
|
||||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
|
||||||
void DS_AND_B32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_OR_B32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_XOR_B32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||||
|
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||||
void DS_APPEND(const GcnInst& inst);
|
void DS_APPEND(const GcnInst& inst);
|
||||||
void DS_CONSUME(const GcnInst& inst);
|
void DS_CONSUME(const GcnInst& inst);
|
||||||
void DS_SUB_U32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_INC_U32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_DEC_U32(const GcnInst& inst, bool rtn);
|
|
||||||
|
|
||||||
// Buffer Memory
|
// Buffer Memory
|
||||||
// MUBUF / MTBUF
|
// MUBUF / MTBUF
|
||||||
|
|
|
@ -565,7 +565,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
|
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
|
||||||
if ((inst.src[0].field == OperandField::ExecHi ||
|
if ((inst.src[0].field == OperandField::ExecHi ||
|
||||||
inst.src[0].field == OperandField::VccHi) &&
|
inst.src[0].field == OperandField::VccHi ||
|
||||||
|
inst.src[0].field == OperandField::ScalarGPR) &&
|
||||||
(inst.src[1].field == OperandField::ConstZero ||
|
(inst.src[1].field == OperandField::ConstZero ||
|
||||||
inst.src[1].field == OperandField::VectorGPR)) {
|
inst.src[1].field == OperandField::VectorGPR)) {
|
||||||
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
||||||
|
@ -579,7 +580,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
|
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
|
||||||
// used combined with above for append buffer indexing.
|
// used combined with above for append buffer indexing.
|
||||||
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
|
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo ||
|
||||||
|
inst.src[0].field == OperandField::ScalarGPR) {
|
||||||
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
|
|
@ -291,78 +291,137 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||||
Inst(Opcode::SetPatch, patch, value);
|
Inst(Opcode::SetPatch, patch, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset, bool is_gds) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
return Inst<U16>(Opcode::LoadSharedU16, Flags{is_gds}, offset);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
return Inst<U32>(Opcode::LoadSharedU32, Flags{is_gds}, offset);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
return Inst<U64>(Opcode::LoadSharedU64, Flags{is_gds}, offset);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 16:
|
case 16:
|
||||||
Inst(Opcode::WriteSharedU16, offset, value);
|
Inst(Opcode::WriteSharedU16, Flags{is_gds}, offset, value);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
Inst(Opcode::WriteSharedU32, offset, value);
|
Inst(Opcode::WriteSharedU32, Flags{is_gds}, offset, value);
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
Inst(Opcode::WriteSharedU64, offset, value);
|
Inst(Opcode::WriteSharedU64, Flags{is_gds}, offset, value);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
switch (data.Type()) {
|
switch (data.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
return Inst<U32>(Opcode::SharedAtomicIAdd32, Flags{is_gds}, address, data);
|
||||||
case Type::U64:
|
case Type::U64:
|
||||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
return Inst<U64>(Opcode::SharedAtomicIAdd64, Flags{is_gds}, address, data);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(data.Type());
|
ThrowInvalidType(data.Type());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
|
U32U64 IREmitter::SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
|
bool is_gds) {
|
||||||
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMin32 : Opcode::SharedAtomicUMin32,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMin64 : Opcode::SharedAtomicUMin64,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
|
U32U64 IREmitter::SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
|
bool is_gds) {
|
||||||
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMax32 : Opcode::SharedAtomicUMax32,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMax64 : Opcode::SharedAtomicUMax64,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) {
|
U32U64 IREmitter::SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
return Inst<U32>(Opcode::SharedAtomicAnd32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) {
|
U32U64 IREmitter::SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
|
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
|
U32U64 IREmitter::SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicXor32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicXor64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicInc(const U32& address) {
|
U32U64 IREmitter::SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
return Inst<U32>(Opcode::SharedAtomicInc32, address);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicISub32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicISub64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicDec(const U32& address) {
|
template <>
|
||||||
return Inst<U32>(Opcode::SharedAtomicDec32, address);
|
U32 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicInc32, Flags{is_gds}, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
|
template <>
|
||||||
return Inst<U32>(Opcode::SharedAtomicISub32, address, data);
|
U64 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicInc64, Flags{is_gds}, address);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
U32 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicDec32, Flags{is_gds}, address);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
U64 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicDec64, Flags{is_gds}, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||||
|
|
|
@ -96,18 +96,24 @@ public:
|
||||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||||
void SetPatch(Patch patch, const F32& value);
|
void SetPatch(Patch patch, const F32& value);
|
||||||
|
|
||||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset,
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
bool is_gds = false);
|
||||||
|
void WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds = false);
|
||||||
|
|
||||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
[[nodiscard]] U32U64 SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicInc(const U32& address);
|
[[nodiscard]] U32U64 SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||||
[[nodiscard]] U32 SharedAtomicDec(const U32& address);
|
bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds);
|
||||||
|
|
||||||
|
template <typename T = U32>
|
||||||
|
[[nodiscard]] T SharedAtomicInc(const U32& address, bool is_gds);
|
||||||
|
template <typename T = U32>
|
||||||
|
[[nodiscard]] T SharedAtomicDec(const U32& address, bool is_gds);
|
||||||
|
|
||||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||||
|
|
|
@ -41,15 +41,25 @@ OPCODE(WriteSharedU64, Void, U32,
|
||||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||||
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicISub64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicSMin64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicUMin64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicSMax64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicUMax64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicInc32, U32, U32, )
|
OPCODE(SharedAtomicInc32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicInc64, U64, U32, )
|
||||||
OPCODE(SharedAtomicDec32, U32, U32, )
|
OPCODE(SharedAtomicDec32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicDec64, U64, U32, )
|
||||||
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicAnd64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicOr64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicXor64, U64, U32, U64, )
|
||||||
|
|
||||||
// Context getters/setters
|
// Context getters/setters
|
||||||
OPCODE(GetUserData, U32, ScalarReg, )
|
OPCODE(GetUserData, U32, ScalarReg, )
|
||||||
|
|
|
@ -84,8 +84,42 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsDataRingInstruction(const IR::Inst& inst) {
|
bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||||
return inst.GetOpcode() == IR::Opcode::DataAppend ||
|
switch (inst.GetOpcode()) {
|
||||||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
case IR::Opcode::DataAppend:
|
||||||
|
case IR::Opcode::DataConsume:
|
||||||
|
return true;
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin32:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64:
|
||||||
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax32:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64:
|
||||||
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
|
case IR::Opcode::SharedAtomicISub32:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
|
case IR::Opcode::SharedAtomicInc32:
|
||||||
|
case IR::Opcode::SharedAtomicInc64:
|
||||||
|
case IR::Opcode::SharedAtomicDec32:
|
||||||
|
case IR::Opcode::SharedAtomicDec64:
|
||||||
|
return inst.Flags<bool>(); // is_gds
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||||
|
@ -507,7 +541,8 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
|
Descriptors& descriptors) {
|
||||||
const u32 binding = descriptors.Add(BufferResource{
|
const u32 binding = descriptors.Add(BufferResource{
|
||||||
.used_types = IR::Type::U32,
|
.used_types = IR::Type::U32,
|
||||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||||
|
@ -515,37 +550,111 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
||||||
.is_written = true,
|
.is_written = true,
|
||||||
});
|
});
|
||||||
|
|
||||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
|
||||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
|
||||||
return inst;
|
|
||||||
}
|
|
||||||
return std::nullopt;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Attempt to deduce the GDS address of counter at compile time.
|
|
||||||
u32 gds_addr = 0;
|
|
||||||
const IR::Value& gds_offset = inst.Arg(0);
|
|
||||||
if (gds_offset.IsImmediate()) {
|
|
||||||
// Nothing to do, offset is known.
|
|
||||||
gds_addr = gds_offset.U32() & 0xFFFF;
|
|
||||||
} else {
|
|
||||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
|
||||||
ASSERT_MSG(result, "Unable to track M0 source");
|
|
||||||
|
|
||||||
// M0 must be set by some user data register.
|
|
||||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
|
||||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
|
||||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
|
||||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
|
||||||
m0_val += prod->Arg(1).U32();
|
|
||||||
}
|
|
||||||
gds_addr = m0_val & 0xFFFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Patch instruction.
|
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
|
||||||
inst.SetArg(1, ir.Imm32(binding));
|
// For data append/consume operations attempt to deduce the GDS address.
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::DataAppend || inst.GetOpcode() == IR::Opcode::DataConsume) {
|
||||||
|
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||||
|
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||||
|
return inst;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 gds_addr = 0;
|
||||||
|
const IR::Value& gds_offset = inst.Arg(0);
|
||||||
|
if (gds_offset.IsImmediate()) {
|
||||||
|
// Nothing to do, offset is known.
|
||||||
|
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||||
|
} else {
|
||||||
|
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||||
|
ASSERT_MSG(result, "Unable to track M0 source");
|
||||||
|
|
||||||
|
// M0 must be set by some user data register.
|
||||||
|
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||||
|
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||||
|
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||||
|
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||||
|
m0_val += prod->Arg(1).U32();
|
||||||
|
}
|
||||||
|
gds_addr = m0_val & 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch instruction.
|
||||||
|
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||||
|
inst.SetArg(1, ir.Imm32(binding));
|
||||||
|
} else {
|
||||||
|
// Convert shared memory opcode to storage buffer atomic to GDS buffer.
|
||||||
|
const IR::U32 offset = IR::U32{inst.Arg(0)};
|
||||||
|
const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
|
||||||
|
const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
|
||||||
|
const IR::U32 address_qwords = ir.ShiftRightLogical(offset, ir.Imm32(3));
|
||||||
|
const IR::U32 handle = ir.Imm32(binding);
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicIAdd(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
inst.ReplaceUsesWith(
|
||||||
|
ir.BufferAtomicIAdd(handle, address_qwords, IR::U64{inst.Arg(1)}, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicISub32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicISub(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
|
case IR::Opcode::SharedAtomicUMin32: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||||
|
inst.ReplaceUsesWith(
|
||||||
|
ir.BufferAtomicIMin(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
|
case IR::Opcode::SharedAtomicUMax32: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||||
|
inst.ReplaceUsesWith(
|
||||||
|
ir.BufferAtomicIMax(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IR::Opcode::SharedAtomicInc32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicInc(handle, address_dwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicDec32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicDec(handle, address_dwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicAnd(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicOr(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
|
inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
|
ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||||
|
@ -916,8 +1025,6 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||||
PatchBufferSharp(*block, inst, info, descriptors);
|
PatchBufferSharp(*block, inst, info, descriptors);
|
||||||
} else if (IsImageInstruction(inst)) {
|
} else if (IsImageInstruction(inst)) {
|
||||||
PatchImageSharp(*block, inst, info, descriptors);
|
PatchImageSharp(*block, inst, info, descriptors);
|
||||||
} else if (IsDataRingInstruction(inst)) {
|
|
||||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -929,6 +1036,8 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||||
PatchBufferArgs(*block, inst, info);
|
PatchBufferArgs(*block, inst, info);
|
||||||
} else if (IsImageInstruction(inst)) {
|
} else if (IsImageInstruction(inst)) {
|
||||||
PatchImageArgs(*block, inst, info);
|
PatchImageArgs(*block, inst, info);
|
||||||
|
} else if (IsDataRingInstruction(inst)) {
|
||||||
|
PatchGlobalDataShareAccess(*block, inst, info, descriptors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
|
|
||||||
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
||||||
|
|
||||||
|
std::memset(gds_buffer.mapped_data.data(), 0, DataShareBufferSize);
|
||||||
|
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
const auto null_id =
|
const auto null_id =
|
||||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue