mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 08:06:20 +00:00
Merge branch 'main' into shader_recompiler/format
This commit is contained in:
commit
18e95ae4c0
138 changed files with 24513 additions and 1242 deletions
|
@ -179,6 +179,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
spv::ExecutionModel execution_model{};
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
if (info.uses_fp16) {
|
||||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
|
|
|
@ -102,7 +102,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
|
||||
}
|
||||
|
||||
|
|
|
@ -305,7 +305,7 @@ static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
return value;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
|
||||
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
|
||||
magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
@ -478,7 +478,7 @@ static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat fo
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
return value;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
|
||||
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
|
||||
magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
|
@ -286,6 +286,7 @@ Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
|
|||
Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
|
||||
Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
|
||||
Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
|
|
|
@ -139,6 +139,13 @@ Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
|||
return result;
|
||||
}
|
||||
|
||||
Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
const Id result{ctx.OpBitwiseAnd(ctx.U64, a, b)};
|
||||
SetZeroFlag(ctx, inst, result);
|
||||
SetSignFlag(ctx, inst, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
|
||||
SetZeroFlag(ctx, inst, result);
|
||||
|
|
|
@ -405,6 +405,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rg16f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm) {
|
||||
return spv::ImageFormat::Rg16Snorm;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||
return spv::ImageFormat::Rg8;
|
||||
|
|
|
@ -21,8 +21,13 @@ struct Compare {
|
|||
}
|
||||
};
|
||||
|
||||
static IR::Condition MakeCondition(Opcode opcode) {
|
||||
switch (opcode) {
|
||||
static IR::Condition MakeCondition(const GcnInst& inst) {
|
||||
if (inst.IsCmpx()) {
|
||||
ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
|
||||
return IR::Condition::Execnz;
|
||||
}
|
||||
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
return IR::Condition::Scc0;
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
|
@ -37,7 +42,6 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
|||
return IR::Condition::Execnz;
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
case Opcode::S_ANDN2_B64:
|
||||
case Opcode::V_CMPX_NE_U32:
|
||||
return IR::Condition::Execnz;
|
||||
default:
|
||||
return IR::Condition::True;
|
||||
|
@ -94,7 +98,8 @@ void CFG::EmitDivergenceLabels() {
|
|||
// While this instruction does not save EXEC it is often used paired
|
||||
// with SAVEEXEC to mask the threads that didn't pass the condition
|
||||
// of initial branch.
|
||||
inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32;
|
||||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
||||
inst.opcode == Opcode::V_CMPX_NE_U32;
|
||||
};
|
||||
const auto is_close_scope = [](const GcnInst& inst) {
|
||||
// Closing an EXEC scope can be either a branch instruction
|
||||
|
@ -104,7 +109,8 @@ void CFG::EmitDivergenceLabels() {
|
|||
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
||||
// Those instructions need to be wrapped in the condition as well so allow branch
|
||||
// as end scope instruction.
|
||||
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64;
|
||||
inst.opcode == Opcode::S_CBRANCH_EXECZ ||
|
||||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
|
||||
};
|
||||
|
||||
// Since we will be adding new labels, avoid iterating those as well.
|
||||
|
@ -171,7 +177,7 @@ void CFG::EmitBlocks() {
|
|||
block->begin_index = GetIndex(start);
|
||||
block->end_index = end_index;
|
||||
block->end_inst = end_inst;
|
||||
block->cond = MakeCondition(end_inst.opcode);
|
||||
block->cond = MakeCondition(end_inst);
|
||||
blocks.insert(*block);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,4 +47,18 @@ bool GcnInst::IsConditionalBranch() const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool GcnInst::IsCmpx() const {
|
||||
if ((opcode >= Opcode::V_CMPX_F_F32 && opcode <= Opcode::V_CMPX_T_F32) ||
|
||||
(opcode >= Opcode::V_CMPX_F_F64 && opcode <= Opcode::V_CMPX_T_F64) ||
|
||||
(opcode >= Opcode::V_CMPSX_F_F32 && opcode <= Opcode::V_CMPSX_T_F32) ||
|
||||
(opcode >= Opcode::V_CMPSX_F_F64 && opcode <= Opcode::V_CMPSX_T_F64) ||
|
||||
(opcode >= Opcode::V_CMPX_F_I32 && opcode <= Opcode::V_CMPX_CLASS_F32) ||
|
||||
(opcode >= Opcode::V_CMPX_F_I64 && opcode <= Opcode::V_CMPX_CLASS_F64) ||
|
||||
(opcode >= Opcode::V_CMPX_F_U32 && opcode <= Opcode::V_CMPX_T_U32) ||
|
||||
(opcode >= Opcode::V_CMPX_F_U64 && opcode <= Opcode::V_CMPX_T_U64)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -203,6 +203,7 @@ struct GcnInst {
|
|||
bool IsUnconditionalBranch() const;
|
||||
bool IsConditionalBranch() const;
|
||||
bool IsFork() const;
|
||||
bool IsCmpx() const;
|
||||
};
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
|
|
@ -6,96 +6,150 @@
|
|||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
switch (inst.encoding) {
|
||||
case InstEncoding::SOPC: {
|
||||
EmitSOPC(inst);
|
||||
break;
|
||||
}
|
||||
case InstEncoding::SOPK: {
|
||||
EmitSOPK(inst);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOV_B32:
|
||||
return S_MOV(inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
case Opcode::S_MOV_B64:
|
||||
return S_MOV_B64(inst);
|
||||
case Opcode::S_OR_B64:
|
||||
return S_OR_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_NOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, false, inst);
|
||||
case Opcode::S_XOR_B64:
|
||||
return S_OR_B64(NegateMode::None, true, inst);
|
||||
case Opcode::S_ORN2_B64:
|
||||
return S_OR_B64(NegateMode::Src1, false, inst);
|
||||
case Opcode::S_AND_B64:
|
||||
return S_AND_B64(NegateMode::None, inst);
|
||||
case Opcode::S_NAND_B64:
|
||||
return S_AND_B64(NegateMode::Result, inst);
|
||||
case Opcode::S_ANDN2_B64:
|
||||
return S_AND_B64(NegateMode::Src1, inst);
|
||||
case Opcode::S_NOT_B64:
|
||||
return S_NOT_B64(inst);
|
||||
case Opcode::S_ADD_I32:
|
||||
return S_ADD_I32(inst);
|
||||
case Opcode::S_AND_B32:
|
||||
return S_AND_B32(inst);
|
||||
case Opcode::S_ASHR_I32:
|
||||
return S_ASHR_I32(inst);
|
||||
case Opcode::S_OR_B32:
|
||||
return S_OR_B32(inst);
|
||||
case Opcode::S_LSHL_B32:
|
||||
return S_LSHL_B32(inst);
|
||||
case Opcode::S_LSHR_B32:
|
||||
return S_LSHR_B32(inst);
|
||||
case Opcode::S_CSELECT_B32:
|
||||
return S_CSELECT_B32(inst);
|
||||
case Opcode::S_CSELECT_B64:
|
||||
return S_CSELECT_B64(inst);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
case Opcode::S_BFM_B32:
|
||||
return S_BFM_B32(inst);
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_ADD_U32:
|
||||
return S_ADD_U32(inst);
|
||||
case Opcode::S_ADDC_U32:
|
||||
return S_ADDC_U32(inst);
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
return S_MIN_U32(inst);
|
||||
case Opcode::S_MAX_U32:
|
||||
return S_MAX_U32(inst);
|
||||
case Opcode::S_WQM_B64:
|
||||
break;
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::EmitSOPC(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
return S_MOVK(inst);
|
||||
case Opcode::S_MOV_B32:
|
||||
return S_MOV(inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
case Opcode::S_MOV_B64:
|
||||
return S_MOV_B64(inst);
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
return S_CMP(ConditionOp::LT, false, inst);
|
||||
case Opcode::S_CMP_LE_U32:
|
||||
return S_CMP(ConditionOp::LE, false, inst);
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
return S_CMP(ConditionOp::LG, false, inst);
|
||||
case Opcode::S_CMP_LT_I32:
|
||||
return S_CMP(ConditionOp::LT, true, inst);
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
return S_CMP(ConditionOp::EQ, true, inst);
|
||||
case Opcode::S_CMP_LG_I32:
|
||||
return S_CMP(ConditionOp::LG, true, inst);
|
||||
case Opcode::S_CMP_GT_I32:
|
||||
return S_CMP(ConditionOp::GT, true, inst);
|
||||
case Opcode::S_CMP_LE_I32:
|
||||
return S_CMP(ConditionOp::LE, true, inst);
|
||||
case Opcode::S_CMP_GE_I32:
|
||||
return S_CMP(ConditionOp::GE, true, inst);
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
return S_CMP(ConditionOp::EQ, true, inst);
|
||||
case Opcode::S_CMP_LT_I32:
|
||||
return S_CMP(ConditionOp::LT, true, inst);
|
||||
case Opcode::S_CMP_LE_I32:
|
||||
return S_CMP(ConditionOp::LE, true, inst);
|
||||
|
||||
case Opcode::S_CMP_EQ_U32:
|
||||
return S_CMP(ConditionOp::EQ, false, inst);
|
||||
case Opcode::S_CMP_GE_U32:
|
||||
return S_CMP(ConditionOp::GE, false, inst);
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
return S_CMP(ConditionOp::LG, false, inst);
|
||||
case Opcode::S_CMP_GT_U32:
|
||||
return S_CMP(ConditionOp::GT, false, inst);
|
||||
case Opcode::S_OR_B64:
|
||||
return S_OR_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_NOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, false, inst);
|
||||
case Opcode::S_XOR_B64:
|
||||
return S_OR_B64(NegateMode::None, true, inst);
|
||||
case Opcode::S_ORN2_B64:
|
||||
return S_OR_B64(NegateMode::Src1, false, inst);
|
||||
case Opcode::S_AND_B64:
|
||||
return S_AND_B64(NegateMode::None, inst);
|
||||
case Opcode::S_NAND_B64:
|
||||
return S_AND_B64(NegateMode::Result, inst);
|
||||
case Opcode::S_ANDN2_B64:
|
||||
return S_AND_B64(NegateMode::Src1, inst);
|
||||
case Opcode::S_NOT_B64:
|
||||
return S_NOT_B64(inst);
|
||||
case Opcode::S_ADD_I32:
|
||||
return S_ADD_I32(inst);
|
||||
case Opcode::S_AND_B32:
|
||||
return S_AND_B32(inst);
|
||||
case Opcode::S_ASHR_I32:
|
||||
return S_ASHR_I32(inst);
|
||||
case Opcode::S_OR_B32:
|
||||
return S_OR_B32(inst);
|
||||
case Opcode::S_LSHL_B32:
|
||||
return S_LSHL_B32(inst);
|
||||
case Opcode::S_LSHR_B32:
|
||||
return S_LSHR_B32(inst);
|
||||
case Opcode::S_CSELECT_B32:
|
||||
return S_CSELECT_B32(inst);
|
||||
case Opcode::S_CSELECT_B64:
|
||||
return S_CSELECT_B64(inst);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
case Opcode::S_BFM_B32:
|
||||
return S_BFM_B32(inst);
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_ADD_U32:
|
||||
return S_ADD_U32(inst);
|
||||
case Opcode::S_ADDC_U32:
|
||||
return S_ADDC_U32(inst);
|
||||
case Opcode::S_CMP_GE_U32:
|
||||
return S_CMP(ConditionOp::GE, false, inst);
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
return S_CMP(ConditionOp::LT, false, inst);
|
||||
case Opcode::S_CMP_LE_U32:
|
||||
return S_CMP(ConditionOp::LE, false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::EmitSOPK(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
return S_MOVK(inst);
|
||||
|
||||
case Opcode::S_CMPK_EQ_I32:
|
||||
return S_CMPK(ConditionOp::EQ, true, inst);
|
||||
case Opcode::S_CMPK_LG_I32:
|
||||
return S_CMPK(ConditionOp::LG, true, inst);
|
||||
case Opcode::S_CMPK_GT_I32:
|
||||
return S_CMPK(ConditionOp::GT, true, inst);
|
||||
case Opcode::S_CMPK_GE_I32:
|
||||
return S_CMPK(ConditionOp::GE, true, inst);
|
||||
case Opcode::S_CMPK_LT_I32:
|
||||
return S_CMPK(ConditionOp::LT, true, inst);
|
||||
case Opcode::S_CMPK_LE_I32:
|
||||
return S_CMPK(ConditionOp::LE, true, inst);
|
||||
|
||||
case Opcode::S_CMPK_EQ_U32:
|
||||
return S_CMPK(ConditionOp::EQ, false, inst);
|
||||
case Opcode::S_CMPK_LG_U32:
|
||||
return S_CMPK(ConditionOp::LG, false, inst);
|
||||
case Opcode::S_CMPK_GT_U32:
|
||||
return S_CMPK(ConditionOp::GT, false, inst);
|
||||
case Opcode::S_CMPK_GE_U32:
|
||||
return S_CMPK(ConditionOp::GE, false, inst);
|
||||
case Opcode::S_CMPK_LT_U32:
|
||||
return S_CMPK(ConditionOp::LT, false, inst);
|
||||
case Opcode::S_CMPK_LE_U32:
|
||||
return S_CMPK(ConditionOp::LE, false, inst);
|
||||
|
||||
case Opcode::S_ADDK_I32:
|
||||
return S_ADDK_I32(inst);
|
||||
case Opcode::S_MULK_I32:
|
||||
return S_MULK_I32(inst);
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
return S_MIN_U32(inst);
|
||||
case Opcode::S_MAX_U32:
|
||||
return S_MAX_U32(inst);
|
||||
case Opcode::S_WQM_B64:
|
||||
break;
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -152,6 +206,31 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
|||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
const IR::U32 lhs = GetSrc(inst.dst[0]);
|
||||
const IR::U32 rhs = ir.Imm32(simm16);
|
||||
const IR::U1 result = [&] {
|
||||
switch (cond) {
|
||||
case ConditionOp::EQ:
|
||||
return ir.IEqual(lhs, rhs);
|
||||
case ConditionOp::LG:
|
||||
return ir.INotEqual(lhs, rhs);
|
||||
case ConditionOp::GT:
|
||||
return ir.IGreaterThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::GE:
|
||||
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
|
||||
case ConditionOp::LT:
|
||||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
|
|
|
@ -69,6 +69,10 @@ public:
|
|||
void EmitScalarAlu(const GcnInst& inst);
|
||||
void EmitVectorAlu(const GcnInst& inst);
|
||||
|
||||
// Instruction encodings
|
||||
void EmitSOPC(const GcnInst& inst);
|
||||
void EmitSOPK(const GcnInst& inst);
|
||||
|
||||
// Scalar ALU
|
||||
void S_MOVK(const GcnInst& inst);
|
||||
void S_MOV(const GcnInst& inst);
|
||||
|
@ -98,6 +102,7 @@ public:
|
|||
void S_ADDK_I32(const GcnInst& inst);
|
||||
void S_MAX_U32(const GcnInst& inst);
|
||||
void S_MIN_U32(const GcnInst& inst);
|
||||
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -116,6 +121,7 @@ public:
|
|||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
void V_LSHL_B32(const GcnInst& inst);
|
||||
void V_LSHL_B64(const GcnInst& inst);
|
||||
void V_ADD_I32(const GcnInst& inst);
|
||||
void V_ADDC_U32(const GcnInst& inst);
|
||||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
|
|
|
@ -11,6 +11,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_LSHLREV_B32(inst);
|
||||
case Opcode::V_LSHL_B32:
|
||||
return V_LSHL_B32(inst);
|
||||
case Opcode::V_LSHL_B64:
|
||||
return V_LSHL_B64(inst);
|
||||
case Opcode::V_BFREV_B32:
|
||||
return V_BFREV_B32(inst);
|
||||
case Opcode::V_BFE_U32:
|
||||
|
@ -280,6 +282,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||
case Opcode::V_CMP_LT_I32:
|
||||
return V_CMP_U32(ConditionOp::LT, true, false, inst);
|
||||
case Opcode::V_CMPX_GT_I32:
|
||||
return V_CMP_U32(ConditionOp::GT, true, true, inst);
|
||||
case Opcode::V_CMPX_LT_I32:
|
||||
return V_CMP_U32(ConditionOp::LT, true, true, inst);
|
||||
case Opcode::V_CMPX_F_U32:
|
||||
|
@ -305,7 +309,6 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_MBCNT_U32_B32(true, inst);
|
||||
case Opcode::V_MBCNT_HI_U32_B32:
|
||||
return V_MBCNT_U32_B32(false, inst);
|
||||
|
||||
case Opcode::V_NOP:
|
||||
return;
|
||||
default:
|
||||
|
@ -389,6 +392,16 @@ void Translator::V_LSHL_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_LSHL_B64(const GcnInst& inst) {
|
||||
const IR::U64 src0{GetSrc64(inst.src[0])};
|
||||
const IR::U64 src1{GetSrc64(inst.src[1])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0,
|
||||
"V_LSHL_B64 with non-zero src0 or src1 is not supported");
|
||||
ir.SetVectorReg(dst_reg, ir.Imm32(0));
|
||||
ir.SetVectorReg(dst_reg + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
|
|
|
@ -94,6 +94,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
|
||||
case Opcode::TBUFFER_STORE_FORMAT_X:
|
||||
return BUFFER_STORE_FORMAT(1, true, true, inst);
|
||||
case Opcode::TBUFFER_STORE_FORMAT_XY:
|
||||
return BUFFER_STORE_FORMAT(2, true, true, inst);
|
||||
case Opcode::TBUFFER_STORE_FORMAT_XYZ:
|
||||
return BUFFER_STORE_FORMAT(3, true, true, inst);
|
||||
|
||||
|
@ -109,6 +111,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
// Buffer atomic operations
|
||||
case Opcode::BUFFER_ATOMIC_ADD:
|
||||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -474,7 +478,7 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
const IR::Value original_val = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
|
||||
return ir.BufferAtomicSwap(handle, address, vdata_val, info);
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Smin:
|
||||
|
|
|
@ -404,9 +404,9 @@ Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, cons
|
|||
return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value);
|
||||
Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
|
||||
|
@ -1115,8 +1115,18 @@ U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
|
|||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
|
||||
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
|
||||
U32U64 IREmitter::BitwiseAnd(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::BitwiseAnd64, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) {
|
||||
|
|
|
@ -115,8 +115,8 @@ public:
|
|||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
|
@ -195,7 +195,7 @@ public:
|
|||
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
|
||||
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
|
||||
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
|
||||
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 BitwiseAnd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
|
||||
|
|
|
@ -70,7 +70,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::BufferAtomicAnd32:
|
||||
case Opcode::BufferAtomicOr32:
|
||||
case Opcode::BufferAtomicXor32:
|
||||
case Opcode::BufferAtomicExchange32:
|
||||
case Opcode::BufferAtomicSwap32:
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU32:
|
||||
|
|
|
@ -95,7 +95,7 @@ OPCODE(StoreBufferFormatF32x4, Void, Opaq
|
|||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||
|
@ -105,7 +105,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq
|
|||
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
|
@ -260,6 +260,7 @@ OPCODE(ShiftRightLogical64, U64, U64,
|
|||
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
|
||||
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
|
||||
OPCODE(BitwiseAnd32, U32, U32, U32, )
|
||||
OPCODE(BitwiseAnd64, U64, U64, U64, )
|
||||
OPCODE(BitwiseOr32, U32, U32, U32, )
|
||||
OPCODE(BitwiseOr64, U64, U64, U64, )
|
||||
OPCODE(BitwiseXor32, U32, U32, U32, )
|
||||
|
|
|
@ -352,9 +352,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::BitwiseAnd32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
|
||||
return;
|
||||
case IR::Opcode::BitwiseAnd64:
|
||||
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a & b; });
|
||||
return;
|
||||
case IR::Opcode::BitwiseOr32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
|
||||
return;
|
||||
case IR::Opcode::BitwiseOr64:
|
||||
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a | b; });
|
||||
return;
|
||||
case IR::Opcode::BitwiseXor32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
|
||||
return;
|
|
@ -32,7 +32,7 @@ bool IsBufferAtomic(const IR::Inst& inst) {
|
|||
case IR::Opcode::BufferAtomicAnd32:
|
||||
case IR::Opcode::BufferAtomicOr32:
|
||||
case IR::Opcode::BufferAtomicXor32:
|
||||
case IR::Opcode::BufferAtomicExchange32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -136,6 +136,7 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
|||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
return IR::Type::U32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -246,10 +247,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
// Samplers with different bindings might still be the same.
|
||||
const auto old_sharp =
|
||||
info.ReadUd<AmdGpu::Sampler>(existing.sgpr_base, existing.dword_offset);
|
||||
const auto new_sharp = info.ReadUd<AmdGpu::Sampler>(desc.sgpr_base, desc.dword_offset);
|
||||
return old_sharp == new_sharp;
|
||||
return existing.GetSsharp(info) == desc.GetSsharp(info);
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
@ -295,10 +293,11 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
|
|||
return not_found;
|
||||
}
|
||||
|
||||
// The bits range is for lods
|
||||
// The bits range is for lods (note that constants are changed after constant propagation pass)
|
||||
const auto* prod0_arg0 = prod0->Arg(0).InstRecursive();
|
||||
if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract ||
|
||||
prod0_arg0->Arg(1).InstRecursive()->Arg(0).U32() != 0x0008000cu) {
|
||||
!(prod0_arg0->Arg(1).IsIdentity() && prod0_arg0->Arg(1).U32() == 12) ||
|
||||
!(prod0_arg0->Arg(2).IsIdentity() && prod0_arg0->Arg(2).U32() == 8)) {
|
||||
return not_found;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue