mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-23 20:05:01 +00:00
renderer_vulkan: Commize and adjust buffer bindings (#1412)
* shader_recompiler: Implement finite cmp class * shader_recompiler: Implement more opcodes * renderer_vulkan: Commonize buffer binding * liverpool: More dma data impl * fix * copy_shader: Handle additional instructions from Knack * translator: Add V_CMPX_GE_I32
This commit is contained in:
parent
47ba6c6344
commit
87f8fea4de
23 changed files with 438 additions and 342 deletions
|
@ -29,6 +29,14 @@ CopyShaderData ParseCopyShader(std::span<const u32> code) {
|
|||
sources[inst.dst[0].code] = inst.control.sopk.simm;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::S_MOV_B32: {
|
||||
sources[inst.dst[0].code] = inst.src[0].code;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::S_ADDK_I32: {
|
||||
sources[inst.dst[0].code] += inst.control.sopk.simm;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::EXP: {
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute semantic = static_cast<IR::Attribute>(exp.target);
|
||||
|
|
|
@ -92,8 +92,12 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
break;
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_BCNT1_I32_B64:
|
||||
return S_BCNT1_I32_B64(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
return S_SAVEEXEC_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_ORN2_SAVEEXEC_B64:
|
||||
return S_SAVEEXEC_B64(NegateMode::Src1, true, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -540,11 +544,17 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
|
||||
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||
const IR::U1 exec{ir.GetExec()};
|
||||
IR::U1 exec{ir.GetExec()};
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
|
@ -568,7 +578,13 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
// Update EXEC.
|
||||
const IR::U1 result = ir.LogicalAnd(exec, src);
|
||||
if (negate == NegateMode::Src1) {
|
||||
exec = ir.LogicalNot(exec);
|
||||
}
|
||||
IR::U1 result = is_or ? ir.LogicalOr(exec, src) : ir.LogicalAnd(exec, src);
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ir.SetExec(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
|
|
@ -108,8 +108,9 @@ public:
|
|||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
|
||||
// SOPC
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
|
@ -225,6 +226,7 @@ public:
|
|||
void V_MED3_I32(const GcnInst& inst);
|
||||
void V_SAD(const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_CVT_PK_U16_U32(const GcnInst& inst);
|
||||
void V_CVT_PK_U8_F32(const GcnInst& inst);
|
||||
void V_LSHL_B64(const GcnInst& inst);
|
||||
void V_MUL_F64(const GcnInst& inst);
|
||||
|
|
|
@ -157,6 +157,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_RCP_F64(inst);
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_RCP_CLAMP_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_RSQ_CLAMP_F32:
|
||||
return V_RSQ_F32(inst);
|
||||
case Opcode::V_RSQ_LEGACY_F32:
|
||||
|
@ -268,6 +270,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_CMP_U32(ConditionOp::GT, true, true, inst);
|
||||
case Opcode::V_CMPX_LG_I32:
|
||||
return V_CMP_U32(ConditionOp::LG, true, true, inst);
|
||||
case Opcode::V_CMPX_GE_I32:
|
||||
return V_CMP_U32(ConditionOp::GE, true, true, inst);
|
||||
|
||||
// V_CMP_{OP8}_U32
|
||||
case Opcode::V_CMP_F_U32:
|
||||
|
@ -355,6 +359,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_MED3_I32(inst);
|
||||
case Opcode::V_SAD_U32:
|
||||
return V_SAD_U32(inst);
|
||||
case Opcode::V_CVT_PK_U16_U32:
|
||||
return V_CVT_PK_U16_U32(inst);
|
||||
case Opcode::V_CVT_PK_U8_F32:
|
||||
return V_CVT_PK_U8_F32(inst);
|
||||
case Opcode::V_LSHL_B64:
|
||||
|
@ -1108,6 +1114,14 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.IAdd(result, src2));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false);
|
||||
const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false);
|
||||
SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <type_traits>
|
||||
#include "common/func_traits.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
|
@ -215,36 +216,17 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
template <u32 idx>
|
||||
bool IsArgImm(const IR::Inst& inst, u32 imm) {
|
||||
const IR::Value& arg = inst.Arg(idx);
|
||||
return arg.IsImmediate() && arg.U32() == imm;
|
||||
};
|
||||
|
||||
void FoldBooleanConvert(IR::Inst& inst) {
|
||||
// Eliminate pattern
|
||||
// %4 = <some bool>
|
||||
// %5 = SelectU32 %4, #1, #0 (uses: 2)
|
||||
// %8 = INotEqual %5, #0 (uses: 1)
|
||||
if (!IsArgImm<1>(inst, 0)) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* prod = inst.Arg(0).TryInstRecursive();
|
||||
if (!prod || prod->GetOpcode() != IR::Opcode::SelectU32) {
|
||||
return;
|
||||
}
|
||||
if (IsArgImm<1>(*prod, 1) && IsArgImm<2>(*prod, 0)) {
|
||||
inst.ReplaceUsesWith(prod->Arg(0));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Inst& inst) {
|
||||
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsNan32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsInf32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Finite) == IR::FloatClassFunc::Finite) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 value = IR::F32{inst.Arg(0)};
|
||||
inst.ReplaceUsesWith(ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value))));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -276,7 +258,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(inst);
|
||||
FoldCmpClass(block, inst);
|
||||
return;
|
||||
case IR::Opcode::ShiftLeftLogical32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return static_cast<u32>(a << b); });
|
||||
|
|
|
@ -605,7 +605,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
: IR::F32{};
|
||||
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||
|
||||
const auto new_inst = [&] -> IR::Value {
|
||||
auto new_inst = [&] -> IR::Value {
|
||||
if (inst_info.is_gather) {
|
||||
if (inst_info.is_depth) {
|
||||
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||
|
|
|
@ -24,6 +24,8 @@ enum class FloatClassFunc : u32 {
|
|||
|
||||
NaN = SignalingNan | QuietNan,
|
||||
Infinity = PositiveInfinity | NegativeInfinity,
|
||||
Finite = NegativeNormal | NegativeDenorm | NegativeZero | PositiveNormal | PositiveDenorm |
|
||||
PositiveZero,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue