mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 20:36:16 +00:00
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert * shader_recompiler: Add bit instructions * shader_recompiler: More data share instructions * shader_recompiler: Remove exec contexts, fix S_MOV_B64 * shader_recompiler: Split instruction parsing into categories * shader_recompiler: Better BFS search * shader_recompiler: Constant propagation pass for cmp_class_f32 * shader_recompiler: Partial readfirstlane implementation * shader_recompiler: Stub readlane/writelane only for non-compute * hack: Fix swizzle on RDR * Will properly fix this when merging this * clang format * address_space: Bump user area size to full * shader_recompiler: V_INTERP_MOV_F32 * Should work the same as spirv will emit flat decoration on demand * kernel: Add MAP_OP_MAP_FLEXIBLE * image_view: Attempt to apply storage swizzle on format * vk_scheduler: Barrier attachments on renderpass end * clang format * liverpool: cs state backup * shader_recompiler: More instructions and formats * vector_alu: Proper V_MBCNT_U32_B32 * shader_recompiler: Port some dark souls things * file_system: Implement sceKernelRename * more formats * clang format * resource_tracking_pass: Back to assert * translate: Tracedata * kernel: Remove tracy lock * Solves random crashes in Dark Souls * code: Review comments
This commit is contained in:
parent
ac6dc20c3b
commit
a7c9bfa5c5
66 changed files with 1349 additions and 904 deletions
|
@ -2,9 +2,311 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::V_LSHLREV_B32:
|
||||
return V_LSHLREV_B32(inst);
|
||||
case Opcode::V_LSHL_B32:
|
||||
return V_LSHL_B32(inst);
|
||||
case Opcode::V_BFREV_B32:
|
||||
return V_BFREV_B32(inst);
|
||||
case Opcode::V_BFE_U32:
|
||||
return V_BFE_U32(false, inst);
|
||||
case Opcode::V_BFE_I32:
|
||||
return V_BFE_U32(true, inst);
|
||||
case Opcode::V_BFI_B32:
|
||||
return V_BFI_B32(inst);
|
||||
case Opcode::V_LSHR_B32:
|
||||
return V_LSHR_B32(inst);
|
||||
case Opcode::V_ASHRREV_I32:
|
||||
return V_ASHRREV_I32(inst);
|
||||
case Opcode::V_LSHRREV_B32:
|
||||
return V_LSHRREV_B32(inst);
|
||||
case Opcode::V_NOT_B32:
|
||||
return V_NOT_B32(inst);
|
||||
case Opcode::V_AND_B32:
|
||||
return V_AND_B32(inst);
|
||||
case Opcode::V_OR_B32:
|
||||
return V_OR_B32(false, inst);
|
||||
case Opcode::V_XOR_B32:
|
||||
return V_OR_B32(true, inst);
|
||||
case Opcode::V_FFBL_B32:
|
||||
return V_FFBL_B32(inst);
|
||||
|
||||
case Opcode::V_MOV_B32:
|
||||
return V_MOV(inst);
|
||||
case Opcode::V_ADD_I32:
|
||||
return V_ADD_I32(inst);
|
||||
case Opcode::V_ADDC_U32:
|
||||
return V_ADDC_U32(inst);
|
||||
case Opcode::V_CVT_F32_I32:
|
||||
return V_CVT_F32_I32(inst);
|
||||
case Opcode::V_CVT_F32_U32:
|
||||
return V_CVT_F32_U32(inst);
|
||||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||
return V_CVT_PKRTZ_F16_F32(inst);
|
||||
case Opcode::V_CVT_F32_F16:
|
||||
return V_CVT_F32_F16(inst);
|
||||
case Opcode::V_CVT_F16_F32:
|
||||
return V_CVT_F16_F32(inst);
|
||||
case Opcode::V_CVT_F32_UBYTE0:
|
||||
return V_CVT_F32_UBYTE(0, inst);
|
||||
case Opcode::V_CVT_F32_UBYTE1:
|
||||
return V_CVT_F32_UBYTE(1, inst);
|
||||
case Opcode::V_CVT_F32_UBYTE2:
|
||||
return V_CVT_F32_UBYTE(2, inst);
|
||||
case Opcode::V_CVT_F32_UBYTE3:
|
||||
return V_CVT_F32_UBYTE(3, inst);
|
||||
case Opcode::V_CVT_OFF_F32_I4:
|
||||
return V_CVT_OFF_F32_I4(inst);
|
||||
case Opcode::V_MAD_U64_U32:
|
||||
return V_MAD_U64_U32(inst);
|
||||
case Opcode::V_CMP_GE_I32:
|
||||
return V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||
case Opcode::V_CMP_EQ_I32:
|
||||
return V_CMP_U32(ConditionOp::EQ, true, false, inst);
|
||||
case Opcode::V_CMP_LE_I32:
|
||||
return V_CMP_U32(ConditionOp::LE, true, false, inst);
|
||||
case Opcode::V_CMP_NE_I32:
|
||||
return V_CMP_U32(ConditionOp::LG, true, false, inst);
|
||||
case Opcode::V_CMP_NE_U32:
|
||||
return V_CMP_U32(ConditionOp::LG, false, false, inst);
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
return V_CMP_U32(ConditionOp::EQ, false, false, inst);
|
||||
case Opcode::V_CMP_F_U32:
|
||||
return V_CMP_U32(ConditionOp::F, false, false, inst);
|
||||
case Opcode::V_CMP_LT_U32:
|
||||
return V_CMP_U32(ConditionOp::LT, false, false, inst);
|
||||
case Opcode::V_CMP_GT_U32:
|
||||
return V_CMP_U32(ConditionOp::GT, false, false, inst);
|
||||
case Opcode::V_CMP_GE_U32:
|
||||
return V_CMP_U32(ConditionOp::GE, false, false, inst);
|
||||
case Opcode::V_CMP_TRU_U32:
|
||||
return V_CMP_U32(ConditionOp::TRU, false, false, inst);
|
||||
case Opcode::V_CMP_NEQ_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
case Opcode::V_CMP_F_F32:
|
||||
return V_CMP_F32(ConditionOp::F, false, inst);
|
||||
case Opcode::V_CMP_LT_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
case Opcode::V_CMP_EQ_F32:
|
||||
return V_CMP_F32(ConditionOp::EQ, false, inst);
|
||||
case Opcode::V_CMP_LE_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
case Opcode::V_CMP_GT_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
case Opcode::V_CMP_LG_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
case Opcode::V_CMP_GE_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
case Opcode::V_CMP_NLE_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
case Opcode::V_CMP_NLT_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
case Opcode::V_CMP_NGT_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
case Opcode::V_CMP_NGE_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
case Opcode::V_CMP_U_F32:
|
||||
return V_CMP_F32(ConditionOp::U, false, inst);
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
return V_CNDMASK_B32(inst);
|
||||
case Opcode::V_MAX_I32:
|
||||
return V_MAX_U32(true, inst);
|
||||
case Opcode::V_MAX_U32:
|
||||
return V_MAX_U32(false, inst);
|
||||
case Opcode::V_MIN_I32:
|
||||
return V_MIN_I32(inst);
|
||||
case Opcode::V_CUBEMA_F32:
|
||||
return V_CUBEMA_F32(inst);
|
||||
case Opcode::V_CUBESC_F32:
|
||||
return V_CUBESC_F32(inst);
|
||||
case Opcode::V_CUBETC_F32:
|
||||
return V_CUBETC_F32(inst);
|
||||
case Opcode::V_CUBEID_F32:
|
||||
return V_CUBEID_F32(inst);
|
||||
case Opcode::V_CVT_U32_F32:
|
||||
return V_CVT_U32_F32(inst);
|
||||
case Opcode::V_CVT_I32_F32:
|
||||
return V_CVT_I32_F32(inst);
|
||||
case Opcode::V_CVT_FLR_I32_F32:
|
||||
return V_CVT_FLR_I32_F32(inst);
|
||||
case Opcode::V_SUBREV_I32:
|
||||
return V_SUBREV_I32(inst);
|
||||
case Opcode::V_MUL_HI_U32:
|
||||
return V_MUL_HI_U32(false, inst);
|
||||
case Opcode::V_MUL_LO_I32:
|
||||
return V_MUL_LO_U32(inst);
|
||||
case Opcode::V_SAD_U32:
|
||||
return V_SAD_U32(inst);
|
||||
case Opcode::V_SUB_I32:
|
||||
return V_SUB_I32(inst);
|
||||
case Opcode::V_MAD_I32_I24:
|
||||
return V_MAD_I32_I24(inst);
|
||||
case Opcode::V_MUL_I32_I24:
|
||||
case Opcode::V_MUL_U32_U24:
|
||||
return V_MUL_I32_I24(inst);
|
||||
case Opcode::V_MAD_U32_U24:
|
||||
return V_MAD_U32_U24(inst);
|
||||
case Opcode::V_BCNT_U32_B32:
|
||||
return V_BCNT_U32_B32(inst);
|
||||
case Opcode::V_MUL_LO_U32:
|
||||
return V_MUL_LO_U32(inst);
|
||||
case Opcode::V_MIN_U32:
|
||||
return V_MIN_U32(inst);
|
||||
case Opcode::V_CMP_NE_U64:
|
||||
return V_CMP_NE_U64(inst);
|
||||
case Opcode::V_READFIRSTLANE_B32:
|
||||
return V_READFIRSTLANE_B32(inst);
|
||||
case Opcode::V_READLANE_B32:
|
||||
return V_READLANE_B32(inst);
|
||||
case Opcode::V_WRITELANE_B32:
|
||||
return V_WRITELANE_B32(inst);
|
||||
|
||||
case Opcode::V_MAD_F32:
|
||||
return V_MAD_F32(inst);
|
||||
case Opcode::V_MAC_F32:
|
||||
return V_MAC_F32(inst);
|
||||
case Opcode::V_MUL_F32:
|
||||
return V_MUL_F32(inst);
|
||||
case Opcode::V_RCP_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_LDEXP_F32:
|
||||
return V_LDEXP_F32(inst);
|
||||
case Opcode::V_FRACT_F32:
|
||||
return V_FRACT_F32(inst);
|
||||
case Opcode::V_ADD_F32:
|
||||
return V_ADD_F32(inst);
|
||||
case Opcode::V_MED3_F32:
|
||||
return V_MED3_F32(inst);
|
||||
case Opcode::V_FLOOR_F32:
|
||||
return V_FLOOR_F32(inst);
|
||||
case Opcode::V_SUB_F32:
|
||||
return V_SUB_F32(inst);
|
||||
case Opcode::V_FMA_F32:
|
||||
case Opcode::V_MADAK_F32:
|
||||
return V_FMA_F32(inst);
|
||||
case Opcode::V_MAX_F32:
|
||||
return V_MAX_F32(inst);
|
||||
case Opcode::V_RSQ_F32:
|
||||
return V_RSQ_F32(inst);
|
||||
case Opcode::V_SIN_F32:
|
||||
return V_SIN_F32(inst);
|
||||
case Opcode::V_COS_F32:
|
||||
return V_COS_F32(inst);
|
||||
case Opcode::V_LOG_F32:
|
||||
return V_LOG_F32(inst);
|
||||
case Opcode::V_EXP_F32:
|
||||
return V_EXP_F32(inst);
|
||||
case Opcode::V_SQRT_F32:
|
||||
return V_SQRT_F32(inst);
|
||||
case Opcode::V_MIN_F32:
|
||||
return V_MIN_F32(inst, false);
|
||||
case Opcode::V_MIN3_F32:
|
||||
return V_MIN3_F32(inst);
|
||||
case Opcode::V_MIN3_I32:
|
||||
return V_MIN3_I32(inst);
|
||||
case Opcode::V_MIN_LEGACY_F32:
|
||||
return V_MIN_F32(inst, true);
|
||||
case Opcode::V_MADMK_F32:
|
||||
return V_MADMK_F32(inst);
|
||||
case Opcode::V_SUBREV_F32:
|
||||
return V_SUBREV_F32(inst);
|
||||
case Opcode::V_RNDNE_F32:
|
||||
return V_RNDNE_F32(inst);
|
||||
case Opcode::V_MAX3_F32:
|
||||
return V_MAX3_F32(inst);
|
||||
case Opcode::V_MAX3_U32:
|
||||
return V_MAX3_U32(inst);
|
||||
case Opcode::V_TRUNC_F32:
|
||||
return V_TRUNC_F32(inst);
|
||||
case Opcode::V_CEIL_F32:
|
||||
return V_CEIL_F32(inst);
|
||||
case Opcode::V_MUL_LEGACY_F32:
|
||||
return V_MUL_F32(inst);
|
||||
case Opcode::V_MAC_LEGACY_F32:
|
||||
return V_MAC_F32(inst);
|
||||
case Opcode::V_MAD_LEGACY_F32:
|
||||
return V_MAD_F32(inst);
|
||||
case Opcode::V_MAX_LEGACY_F32:
|
||||
return V_MAX_F32(inst, true);
|
||||
case Opcode::V_RSQ_LEGACY_F32:
|
||||
case Opcode::V_RSQ_CLAMP_F32:
|
||||
return V_RSQ_F32(inst);
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
return V_RCP_F32(inst);
|
||||
|
||||
case Opcode::V_CMPX_F_F32:
|
||||
return V_CMP_F32(ConditionOp::F, true, inst);
|
||||
case Opcode::V_CMPX_LT_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
case Opcode::V_CMPX_EQ_F32:
|
||||
return V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
case Opcode::V_CMPX_LE_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
case Opcode::V_CMPX_GT_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
case Opcode::V_CMPX_LG_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
case Opcode::V_CMPX_GE_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
case Opcode::V_CMPX_NGE_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
case Opcode::V_CMPX_NLG_F32:
|
||||
return V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
case Opcode::V_CMPX_NGT_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
case Opcode::V_CMPX_NLE_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
case Opcode::V_CMPX_NEQ_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
case Opcode::V_CMPX_NLT_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
case Opcode::V_CMPX_TRU_F32:
|
||||
return V_CMP_F32(ConditionOp::TRU, true, inst);
|
||||
case Opcode::V_CMP_CLASS_F32:
|
||||
return V_CMP_CLASS_F32(inst);
|
||||
|
||||
case Opcode::V_CMP_LE_U32:
|
||||
return V_CMP_U32(ConditionOp::LE, false, false, inst);
|
||||
case Opcode::V_CMP_GT_I32:
|
||||
return V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||
case Opcode::V_CMP_LT_I32:
|
||||
return V_CMP_U32(ConditionOp::LT, true, false, inst);
|
||||
case Opcode::V_CMPX_LT_I32:
|
||||
return V_CMP_U32(ConditionOp::LT, true, true, inst);
|
||||
case Opcode::V_CMPX_F_U32:
|
||||
return V_CMP_U32(ConditionOp::F, false, true, inst);
|
||||
case Opcode::V_CMPX_LT_U32:
|
||||
return V_CMP_U32(ConditionOp::LT, false, true, inst);
|
||||
case Opcode::V_CMPX_EQ_U32:
|
||||
return V_CMP_U32(ConditionOp::EQ, false, true, inst);
|
||||
case Opcode::V_CMPX_LE_U32:
|
||||
return V_CMP_U32(ConditionOp::LE, false, true, inst);
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
return V_CMP_U32(ConditionOp::GT, false, true, inst);
|
||||
case Opcode::V_CMPX_NE_U32:
|
||||
return V_CMP_U32(ConditionOp::LG, false, true, inst);
|
||||
case Opcode::V_CMPX_GE_U32:
|
||||
return V_CMP_U32(ConditionOp::GE, false, true, inst);
|
||||
case Opcode::V_CMPX_TRU_U32:
|
||||
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
||||
case Opcode::V_CMPX_LG_I32:
|
||||
return V_CMP_U32(ConditionOp::LG, true, true, inst);
|
||||
|
||||
case Opcode::V_MBCNT_LO_U32_B32:
|
||||
return V_MBCNT_U32_B32(true, inst);
|
||||
case Opcode::V_MBCNT_HI_U32_B32:
|
||||
return V_MBCNT_U32_B32(false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
@ -32,6 +334,12 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0 = GetSrc(inst.src[0], true);
|
||||
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
|
||||
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
||||
}
|
||||
|
@ -85,6 +393,12 @@ void Translator::V_LSHLREV_B32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_LSHL_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
|
@ -208,6 +522,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
|||
return ir.FPLessThanEqual(src0, src1);
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
case ConditionOp::U:
|
||||
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -278,6 +594,13 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MIN3_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
|
@ -320,12 +643,13 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
|
||||
|
||||
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
||||
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
||||
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
|
||||
|
||||
const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
|
||||
// const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
|
||||
const IR::U64 mul_result =
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U)));
|
||||
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
|
||||
|
||||
SetDst64(inst.dst[0], sum_result);
|
||||
|
@ -463,6 +787,13 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MAX3_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
||||
|
@ -561,38 +892,58 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
constexpr u32 SIGNALING_NAN = 1 << 0;
|
||||
constexpr u32 QUIET_NAN = 1 << 1;
|
||||
constexpr u32 NEGATIVE_INFINITY = 1 << 2;
|
||||
constexpr u32 NEGATIVE_NORMAL = 1 << 3;
|
||||
constexpr u32 NEGATIVE_DENORM = 1 << 4;
|
||||
constexpr u32 NEGATIVE_ZERO = 1 << 5;
|
||||
constexpr u32 POSITIVE_ZERO = 1 << 6;
|
||||
constexpr u32 POSITIVE_DENORM = 1 << 7;
|
||||
constexpr u32 POSITIVE_NORMAL = 1 << 8;
|
||||
constexpr u32 POSITIVE_INFINITY = 1 << 9;
|
||||
|
||||
const IR::F32F64 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U1 value;
|
||||
if (src1.IsImmediate()) {
|
||||
const u32 class_mask = src1.U32();
|
||||
IR::U1 value;
|
||||
if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) {
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(src1.U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
value = ir.FPIsNan(src0);
|
||||
} else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) ==
|
||||
(POSITIVE_INFINITY | NEGATIVE_INFINITY)) {
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
value = ir.FPIsInf(src0);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
if (inst.dst[1].field == OperandField::VccLo) {
|
||||
return ir.SetVcc(value);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else {
|
||||
// We don't know the type yet, delay its resolution.
|
||||
value = ir.FPCmpClass32(src0, src1);
|
||||
}
|
||||
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVcc(value);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_FFBL_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FindILsb(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 lane_id = ir.LaneId();
|
||||
|
||||
const auto [warp_half, mask_shift] = [&]() -> std::pair<IR::U32, IR::U32> {
|
||||
if (profile.subgroup_size == 32) {
|
||||
const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1));
|
||||
return std::make_pair(warp_half, lane_id);
|
||||
}
|
||||
const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5));
|
||||
const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F));
|
||||
return std::make_pair(warp_half, mask_shift);
|
||||
}();
|
||||
|
||||
const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1));
|
||||
const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0));
|
||||
const IR::U32 mask = IR::U32{ir.Select(is_odd_warp, is_low ? ir.Imm32(~0U) : thread_mask,
|
||||
is_low ? thread_mask : ir.Imm32(0))};
|
||||
const IR::U32 masked_value = ir.BitwiseAnd(src0, mask);
|
||||
const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value));
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue