Merge branch 'main' into shader_recompiler/format

This commit is contained in:
georgemoralis 2024-08-29 10:18:12 +03:00 committed by GitHub
commit 18e95ae4c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
138 changed files with 24513 additions and 1242 deletions

View file

@ -179,6 +179,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
spv::ExecutionModel execution_model{};
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::ImageQuery);
if (info.uses_fp16) {
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);

View file

@ -102,7 +102,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor);
}
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
}

View file

@ -305,7 +305,7 @@ static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
case AmdGpu::NumberFormat::Float:
return value;
default:
UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
magic_enum::enum_name(format));
}
}
@ -478,7 +478,7 @@ static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat fo
case AmdGpu::NumberFormat::Float:
return value;
default:
UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
magic_enum::enum_name(format));
}
}

View file

@ -91,7 +91,7 @@ Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
@ -286,6 +286,7 @@ Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);

View file

@ -139,6 +139,13 @@ Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return result;
}
Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
const Id result{ctx.OpBitwiseAnd(ctx.U64, a, b)};
SetZeroFlag(ctx, inst, result);
SetSignFlag(ctx, inst, result);
return result;
}
Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
SetZeroFlag(ctx, inst, result);

View file

@ -405,6 +405,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rg16f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm) {
return spv::ImageFormat::Rg16Snorm;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::Rg8;

View file

@ -21,8 +21,13 @@ struct Compare {
}
};
static IR::Condition MakeCondition(Opcode opcode) {
switch (opcode) {
static IR::Condition MakeCondition(const GcnInst& inst) {
if (inst.IsCmpx()) {
ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
return IR::Condition::Execnz;
}
switch (inst.opcode) {
case Opcode::S_CBRANCH_SCC0:
return IR::Condition::Scc0;
case Opcode::S_CBRANCH_SCC1:
@ -37,7 +42,6 @@ static IR::Condition MakeCondition(Opcode opcode) {
return IR::Condition::Execnz;
case Opcode::S_AND_SAVEEXEC_B64:
case Opcode::S_ANDN2_B64:
case Opcode::V_CMPX_NE_U32:
return IR::Condition::Execnz;
default:
return IR::Condition::True;
@ -94,7 +98,8 @@ void CFG::EmitDivergenceLabels() {
// While this instruction does not save EXEC it is often used paired
// with SAVEEXEC to mask the threads that didn't pass the condition
// of initial branch.
inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32;
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
inst.opcode == Opcode::V_CMPX_NE_U32;
};
const auto is_close_scope = [](const GcnInst& inst) {
// Closing an EXEC scope can be either a branch instruction
@ -104,7 +109,8 @@ void CFG::EmitDivergenceLabels() {
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
// Those instructions need to be wrapped in the condition as well so allow branch
// as end scope instruction.
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64;
inst.opcode == Opcode::S_CBRANCH_EXECZ ||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
};
// Since we will be adding new labels, avoid iterating those as well.
@ -171,7 +177,7 @@ void CFG::EmitBlocks() {
block->begin_index = GetIndex(start);
block->end_index = end_index;
block->end_inst = end_inst;
block->cond = MakeCondition(end_inst.opcode);
block->cond = MakeCondition(end_inst);
blocks.insert(*block);
}
}

View file

@ -47,4 +47,18 @@ bool GcnInst::IsConditionalBranch() const {
return false;
}
bool GcnInst::IsCmpx() const {
if ((opcode >= Opcode::V_CMPX_F_F32 && opcode <= Opcode::V_CMPX_T_F32) ||
(opcode >= Opcode::V_CMPX_F_F64 && opcode <= Opcode::V_CMPX_T_F64) ||
(opcode >= Opcode::V_CMPSX_F_F32 && opcode <= Opcode::V_CMPSX_T_F32) ||
(opcode >= Opcode::V_CMPSX_F_F64 && opcode <= Opcode::V_CMPSX_T_F64) ||
(opcode >= Opcode::V_CMPX_F_I32 && opcode <= Opcode::V_CMPX_CLASS_F32) ||
(opcode >= Opcode::V_CMPX_F_I64 && opcode <= Opcode::V_CMPX_CLASS_F64) ||
(opcode >= Opcode::V_CMPX_F_U32 && opcode <= Opcode::V_CMPX_T_U32) ||
(opcode >= Opcode::V_CMPX_F_U64 && opcode <= Opcode::V_CMPX_T_U64)) {
return true;
}
return false;
}
} // namespace Shader::Gcn

View file

@ -203,6 +203,7 @@ struct GcnInst {
bool IsUnconditionalBranch() const;
bool IsConditionalBranch() const;
bool IsFork() const;
bool IsCmpx() const;
};
} // namespace Shader::Gcn

View file

@ -3,6 +3,7 @@
#include <algorithm>
#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <utility>

View file

@ -6,96 +6,150 @@
namespace Shader::Gcn {
void Translator::EmitScalarAlu(const GcnInst& inst) {
switch (inst.encoding) {
case InstEncoding::SOPC: {
EmitSOPC(inst);
break;
}
case InstEncoding::SOPK: {
EmitSOPK(inst);
break;
}
default:
switch (inst.opcode) {
case Opcode::S_MOV_B32:
return S_MOV(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
return S_AND_SAVEEXEC_B64(inst);
case Opcode::S_MOV_B64:
return S_MOV_B64(inst);
case Opcode::S_OR_B64:
return S_OR_B64(NegateMode::None, false, inst);
case Opcode::S_NOR_B64:
return S_OR_B64(NegateMode::Result, false, inst);
case Opcode::S_XOR_B64:
return S_OR_B64(NegateMode::None, true, inst);
case Opcode::S_ORN2_B64:
return S_OR_B64(NegateMode::Src1, false, inst);
case Opcode::S_AND_B64:
return S_AND_B64(NegateMode::None, inst);
case Opcode::S_NAND_B64:
return S_AND_B64(NegateMode::Result, inst);
case Opcode::S_ANDN2_B64:
return S_AND_B64(NegateMode::Src1, inst);
case Opcode::S_NOT_B64:
return S_NOT_B64(inst);
case Opcode::S_ADD_I32:
return S_ADD_I32(inst);
case Opcode::S_AND_B32:
return S_AND_B32(inst);
case Opcode::S_ASHR_I32:
return S_ASHR_I32(inst);
case Opcode::S_OR_B32:
return S_OR_B32(inst);
case Opcode::S_LSHL_B32:
return S_LSHL_B32(inst);
case Opcode::S_LSHR_B32:
return S_LSHR_B32(inst);
case Opcode::S_CSELECT_B32:
return S_CSELECT_B32(inst);
case Opcode::S_CSELECT_B64:
return S_CSELECT_B64(inst);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
case Opcode::S_BFM_B32:
return S_BFM_B32(inst);
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_ADD_U32:
return S_ADD_U32(inst);
case Opcode::S_ADDC_U32:
return S_ADDC_U32(inst);
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
return S_SUB_U32(inst);
case Opcode::S_MIN_U32:
return S_MIN_U32(inst);
case Opcode::S_MAX_U32:
return S_MAX_U32(inst);
case Opcode::S_WQM_B64:
break;
default:
LogMissingOpcode(inst);
}
break;
}
}
void Translator::EmitSOPC(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
return S_MOVK(inst);
case Opcode::S_MOV_B32:
return S_MOV(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
return S_AND_SAVEEXEC_B64(inst);
case Opcode::S_MOV_B64:
return S_MOV_B64(inst);
case Opcode::S_CMP_LT_U32:
return S_CMP(ConditionOp::LT, false, inst);
case Opcode::S_CMP_LE_U32:
return S_CMP(ConditionOp::LE, false, inst);
case Opcode::S_CMP_LG_U32:
return S_CMP(ConditionOp::LG, false, inst);
case Opcode::S_CMP_LT_I32:
return S_CMP(ConditionOp::LT, true, inst);
case Opcode::S_CMP_EQ_I32:
return S_CMP(ConditionOp::EQ, true, inst);
case Opcode::S_CMP_LG_I32:
return S_CMP(ConditionOp::LG, true, inst);
case Opcode::S_CMP_GT_I32:
return S_CMP(ConditionOp::GT, true, inst);
case Opcode::S_CMP_LE_I32:
return S_CMP(ConditionOp::LE, true, inst);
case Opcode::S_CMP_GE_I32:
return S_CMP(ConditionOp::GE, true, inst);
case Opcode::S_CMP_EQ_I32:
return S_CMP(ConditionOp::EQ, true, inst);
case Opcode::S_CMP_LT_I32:
return S_CMP(ConditionOp::LT, true, inst);
case Opcode::S_CMP_LE_I32:
return S_CMP(ConditionOp::LE, true, inst);
case Opcode::S_CMP_EQ_U32:
return S_CMP(ConditionOp::EQ, false, inst);
case Opcode::S_CMP_GE_U32:
return S_CMP(ConditionOp::GE, false, inst);
case Opcode::S_CMP_LG_U32:
return S_CMP(ConditionOp::LG, false, inst);
case Opcode::S_CMP_GT_U32:
return S_CMP(ConditionOp::GT, false, inst);
case Opcode::S_OR_B64:
return S_OR_B64(NegateMode::None, false, inst);
case Opcode::S_NOR_B64:
return S_OR_B64(NegateMode::Result, false, inst);
case Opcode::S_XOR_B64:
return S_OR_B64(NegateMode::None, true, inst);
case Opcode::S_ORN2_B64:
return S_OR_B64(NegateMode::Src1, false, inst);
case Opcode::S_AND_B64:
return S_AND_B64(NegateMode::None, inst);
case Opcode::S_NAND_B64:
return S_AND_B64(NegateMode::Result, inst);
case Opcode::S_ANDN2_B64:
return S_AND_B64(NegateMode::Src1, inst);
case Opcode::S_NOT_B64:
return S_NOT_B64(inst);
case Opcode::S_ADD_I32:
return S_ADD_I32(inst);
case Opcode::S_AND_B32:
return S_AND_B32(inst);
case Opcode::S_ASHR_I32:
return S_ASHR_I32(inst);
case Opcode::S_OR_B32:
return S_OR_B32(inst);
case Opcode::S_LSHL_B32:
return S_LSHL_B32(inst);
case Opcode::S_LSHR_B32:
return S_LSHR_B32(inst);
case Opcode::S_CSELECT_B32:
return S_CSELECT_B32(inst);
case Opcode::S_CSELECT_B64:
return S_CSELECT_B64(inst);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
case Opcode::S_BFM_B32:
return S_BFM_B32(inst);
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_ADD_U32:
return S_ADD_U32(inst);
case Opcode::S_ADDC_U32:
return S_ADDC_U32(inst);
case Opcode::S_CMP_GE_U32:
return S_CMP(ConditionOp::GE, false, inst);
case Opcode::S_CMP_LT_U32:
return S_CMP(ConditionOp::LT, false, inst);
case Opcode::S_CMP_LE_U32:
return S_CMP(ConditionOp::LE, false, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::EmitSOPK(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
return S_MOVK(inst);
case Opcode::S_CMPK_EQ_I32:
return S_CMPK(ConditionOp::EQ, true, inst);
case Opcode::S_CMPK_LG_I32:
return S_CMPK(ConditionOp::LG, true, inst);
case Opcode::S_CMPK_GT_I32:
return S_CMPK(ConditionOp::GT, true, inst);
case Opcode::S_CMPK_GE_I32:
return S_CMPK(ConditionOp::GE, true, inst);
case Opcode::S_CMPK_LT_I32:
return S_CMPK(ConditionOp::LT, true, inst);
case Opcode::S_CMPK_LE_I32:
return S_CMPK(ConditionOp::LE, true, inst);
case Opcode::S_CMPK_EQ_U32:
return S_CMPK(ConditionOp::EQ, false, inst);
case Opcode::S_CMPK_LG_U32:
return S_CMPK(ConditionOp::LG, false, inst);
case Opcode::S_CMPK_GT_U32:
return S_CMPK(ConditionOp::GT, false, inst);
case Opcode::S_CMPK_GE_U32:
return S_CMPK(ConditionOp::GE, false, inst);
case Opcode::S_CMPK_LT_U32:
return S_CMPK(ConditionOp::LT, false, inst);
case Opcode::S_CMPK_LE_U32:
return S_CMPK(ConditionOp::LE, false, inst);
case Opcode::S_ADDK_I32:
return S_ADDK_I32(inst);
case Opcode::S_MULK_I32:
return S_MULK_I32(inst);
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
return S_SUB_U32(inst);
case Opcode::S_MIN_U32:
return S_MIN_U32(inst);
case Opcode::S_MAX_U32:
return S_MAX_U32(inst);
case Opcode::S_WQM_B64:
break;
default:
LogMissingOpcode(inst);
}
@ -152,6 +206,31 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
ir.SetScc(result);
}
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
const s32 simm16 = inst.control.sopk.simm;
const IR::U32 lhs = GetSrc(inst.dst[0]);
const IR::U32 rhs = ir.Imm32(simm16);
const IR::U1 result = [&] {
switch (cond) {
case ConditionOp::EQ:
return ir.IEqual(lhs, rhs);
case ConditionOp::LG:
return ir.INotEqual(lhs, rhs);
case ConditionOp::GT:
return ir.IGreaterThan(lhs, rhs, is_signed);
case ConditionOp::GE:
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
case ConditionOp::LT:
return ir.ILessThan(lhs, rhs, is_signed);
case ConditionOp::LE:
return ir.ILessThanEqual(lhs, rhs, is_signed);
default:
UNREACHABLE();
}
}();
ir.SetScc(result);
}
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View file

@ -69,6 +69,10 @@ public:
void EmitScalarAlu(const GcnInst& inst);
void EmitVectorAlu(const GcnInst& inst);
// Instruction encodings
void EmitSOPC(const GcnInst& inst);
void EmitSOPK(const GcnInst& inst);
// Scalar ALU
void S_MOVK(const GcnInst& inst);
void S_MOV(const GcnInst& inst);
@ -98,6 +102,7 @@ public:
void S_ADDK_I32(const GcnInst& inst);
void S_MAX_U32(const GcnInst& inst);
void S_MIN_U32(const GcnInst& inst);
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
// Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -116,6 +121,7 @@ public:
void V_AND_B32(const GcnInst& inst);
void V_LSHLREV_B32(const GcnInst& inst);
void V_LSHL_B32(const GcnInst& inst);
void V_LSHL_B64(const GcnInst& inst);
void V_ADD_I32(const GcnInst& inst);
void V_ADDC_U32(const GcnInst& inst);
void V_CVT_F32_I32(const GcnInst& inst);

View file

@ -11,6 +11,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_LSHLREV_B32(inst);
case Opcode::V_LSHL_B32:
return V_LSHL_B32(inst);
case Opcode::V_LSHL_B64:
return V_LSHL_B64(inst);
case Opcode::V_BFREV_B32:
return V_BFREV_B32(inst);
case Opcode::V_BFE_U32:
@ -280,6 +282,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CMP_U32(ConditionOp::GT, true, false, inst);
case Opcode::V_CMP_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, false, inst);
case Opcode::V_CMPX_GT_I32:
return V_CMP_U32(ConditionOp::GT, true, true, inst);
case Opcode::V_CMPX_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, true, inst);
case Opcode::V_CMPX_F_U32:
@ -305,7 +309,6 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_MBCNT_U32_B32(true, inst);
case Opcode::V_MBCNT_HI_U32_B32:
return V_MBCNT_U32_B32(false, inst);
case Opcode::V_NOP:
return;
default:
@ -389,6 +392,16 @@ void Translator::V_LSHL_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
}
void Translator::V_LSHL_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U64 src1{GetSrc64(inst.src[1])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0,
"V_LSHL_B64 with non-zero src0 or src1 is not supported");
ir.SetVectorReg(dst_reg, ir.Imm32(0));
ir.SetVectorReg(dst_reg + 1, ir.Imm32(0));
}
void Translator::V_ADD_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};

View file

@ -94,6 +94,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::TBUFFER_STORE_FORMAT_X:
return BUFFER_STORE_FORMAT(1, true, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XY:
return BUFFER_STORE_FORMAT(2, true, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XYZ:
return BUFFER_STORE_FORMAT(3, true, true, inst);
@ -109,6 +111,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
// Buffer atomic operations
case Opcode::BUFFER_ATOMIC_ADD:
return BUFFER_ATOMIC(AtomicOp::Add, inst);
case Opcode::BUFFER_ATOMIC_SWAP:
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
default:
LogMissingOpcode(inst);
}
@ -474,7 +478,7 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const IR::Value original_val = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
return ir.BufferAtomicSwap(handle, address, vdata_val, info);
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
case AtomicOp::Smin:

View file

@ -404,9 +404,9 @@ Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, cons
return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value);
Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
}
void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
@ -1115,8 +1115,18 @@ U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
}
}
U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
U32U64 IREmitter::BitwiseAnd(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
case Type::U64:
return Inst<U64>(Opcode::BitwiseAnd64, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) {

View file

@ -115,8 +115,8 @@ public:
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId();
@ -195,7 +195,7 @@ public:
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
[[nodiscard]] U32U64 BitwiseAnd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,

View file

@ -70,7 +70,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::BufferAtomicAnd32:
case Opcode::BufferAtomicOr32:
case Opcode::BufferAtomicXor32:
case Opcode::BufferAtomicExchange32:
case Opcode::BufferAtomicSwap32:
case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32:

View file

@ -95,7 +95,7 @@ OPCODE(StoreBufferFormatF32x4, Void, Opaq
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
// Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
@ -105,7 +105,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
@ -260,6 +260,7 @@ OPCODE(ShiftRightLogical64, U64, U64,
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
OPCODE(BitwiseAnd32, U32, U32, U32, )
OPCODE(BitwiseAnd64, U64, U64, U64, )
OPCODE(BitwiseOr32, U32, U32, U32, )
OPCODE(BitwiseOr64, U64, U64, U64, )
OPCODE(BitwiseXor32, U32, U32, U32, )

View file

@ -352,9 +352,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::BitwiseAnd32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
return;
case IR::Opcode::BitwiseAnd64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a & b; });
return;
case IR::Opcode::BitwiseOr32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
return;
case IR::Opcode::BitwiseOr64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a | b; });
return;
case IR::Opcode::BitwiseXor32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
return;

View file

@ -32,7 +32,7 @@ bool IsBufferAtomic(const IR::Inst& inst) {
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicExchange32:
case IR::Opcode::BufferAtomicSwap32:
return true;
default:
return false;
@ -136,6 +136,7 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::ReadConstBufferU32:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSwap32:
return IR::Type::U32;
default:
UNREACHABLE();
@ -246,10 +247,7 @@ public:
return true;
}
// Samplers with different bindings might still be the same.
const auto old_sharp =
info.ReadUd<AmdGpu::Sampler>(existing.sgpr_base, existing.dword_offset);
const auto new_sharp = info.ReadUd<AmdGpu::Sampler>(desc.sgpr_base, desc.dword_offset);
return old_sharp == new_sharp;
return existing.GetSsharp(info) == desc.GetSsharp(info);
})};
return index;
}
@ -295,10 +293,11 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
return not_found;
}
// The bits range is for lods
// The bits range is for lods (note that constants are changed after constant propagation pass)
const auto* prod0_arg0 = prod0->Arg(0).InstRecursive();
if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract ||
prod0_arg0->Arg(1).InstRecursive()->Arg(0).U32() != 0x0008000cu) {
!(prod0_arg0->Arg(1).IsIdentity() && prod0_arg0->Arg(1).U32() == 12) ||
!(prod0_arg0->Arg(2).IsIdentity() && prod0_arg0->Arg(2).U32() == 8)) {
return not_found;
}