video_core: Add image support

This commit is contained in:
raphaelthegreat 2024-05-27 01:07:46 +03:00
parent 729e166cd3
commit d59b102b6f
48 changed files with 1264 additions and 259 deletions

View file

@ -8,15 +8,18 @@ namespace Shader::Gcn {
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
const IR::U32U64& address) {
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 value = handle.IsEmpty() ? ir.ReadConst(address, ir.Imm32(i))
: ir.ReadConstBuffer(handle, address, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, value);
if (handle.IsEmpty()) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i)));
} else {
const IR::U32 index = ir.IAdd(address, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index));
}
}
}
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2);
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 offset =
smrd.imm ? ir.Imm32(smrd.offset * 4)
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
@ -30,14 +33,12 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2);
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 offset =
smrd.imm ? ir.Imm32(smrd.offset * 4)
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
ir.Imm32(2))};
const IR::Value vsharp =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::ScalarReg dst_reg{inst.dst[0].code};
Load(ir, num_dwords, vsharp, dst_reg, offset);
}

View file

@ -73,8 +73,14 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
return ir.Imm32(1.f);
case OperandField::ConstFloatPos_0_5:
return ir.Imm32(0.5f);
case OperandField::ConstFloatPos_2_0:
return ir.Imm32(2.0f);
case OperandField::ConstFloatPos_4_0:
return ir.Imm32(4.0f);
case OperandField::ConstFloatNeg_0_5:
return ir.Imm32(-0.5f);
case OperandField::ConstFloatNeg_1_0:
return ir.Imm32(-1.0f);
default:
UNREACHABLE();
}
@ -135,6 +141,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_MUL_I32:
translator.S_MUL_I32(inst);
break;
case Opcode::V_MAD_F32:
translator.V_MAD_F32(inst);
break;
case Opcode::V_MOV_B32:
translator.V_MOV(inst);
break;
@ -144,12 +153,39 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MUL_F32:
translator.V_MUL_F32(inst);
break;
case Opcode::V_AND_B32:
translator.V_AND_B32(inst);
break;
case Opcode::V_LSHLREV_B32:
translator.V_LSHLREV_B32(inst);
break;
case Opcode::V_ADD_I32:
translator.V_ADD_I32(inst);
break;
case Opcode::V_CVT_F32_I32:
translator.V_CVT_F32_I32(inst);
break;
case Opcode::V_CVT_F32_U32:
translator.V_CVT_F32_U32(inst);
break;
case Opcode::S_SWAPPC_B64:
ASSERT(info.stage == Stage::Vertex);
translator.EmitFetch(inst);
break;
case Opcode::S_WAITCNT:
break; // Ignore for now.
break;
case Opcode::S_BUFFER_LOAD_DWORD:
translator.S_BUFFER_LOAD_DWORD(1, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX2:
translator.S_BUFFER_LOAD_DWORD(2, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX4:
translator.S_BUFFER_LOAD_DWORD(4, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX8:
translator.S_BUFFER_LOAD_DWORD(8, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX16:
translator.S_BUFFER_LOAD_DWORD(16, inst);
break;
@ -180,7 +216,8 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_ENDPGM:
break;
default:
UNREACHABLE_MSG("Unknown opcode {}", u32(inst.opcode));
const u32 opcode = u32(inst.opcode);
UNREACHABLE_MSG("Unknown opcode {}", opcode);
}
}
}

View file

@ -47,6 +47,12 @@ public:
void V_MUL_F32(const GcnInst& inst);
void V_CMP_EQ_U32(const GcnInst& inst);
void V_CNDMASK_B32(const GcnInst& inst);
void V_AND_B32(const GcnInst& inst);
void V_LSHLREV_B32(const GcnInst& inst);
void V_ADD_I32(const GcnInst& inst);
void V_CVT_F32_I32(const GcnInst& inst);
void V_CVT_F32_U32(const GcnInst& inst);
void V_MAD_F32(const GcnInst& inst);
// Vector Memory
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);

View file

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn {
@ -61,4 +61,45 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
ir.SetVectorReg(dst_reg, IR::U32F32{result});
}
void Translator::V_AND_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1));
}
void Translator::V_LSHLREV_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, src0));
}
void Translator::V_ADD_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1));
// TODO: Carry
}
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.ConvertSToF(32, 32, src0));
}
void Translator::V_CVT_F32_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.ConvertUToF(32, 32, src0));
}
void Translator::V_MAD_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])};
const IR::F32 src1{GetSrc(inst.src[1])};
const IR::F32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
}
} // namespace Shader::Gcn

View file

@ -63,27 +63,34 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
// in coords field of the instruction. Then the resource tracking pass will patch the
// IR instruction to fill in lod_clamp field. The vector can also be used
// as coords directly as SPIR-V will ignore any extra parameters.
const IR::Value body =
ir.CompositeConstruct(ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++),
ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++));
// IR instruction to fill in lod_clamp field.
const IR::Value body = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
IR::TextureInstInfo info{};
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
info.force_level0.Assign(flags.test(MimgModifier::Level0));
info.explicit_lod.Assign(explicit_lod);
// Issue IR instruction, leaving unknown fields blank to patch later.
const IR::Value texel = [&]() -> IR::Value {
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
if (!flags.test(MimgModifier::Pcf)) {
if (explicit_lod) {
return ir.ImageSampleExplicitLod(handle, body, lod, offset, {});
return ir.ImageSampleExplicitLod(handle, body, lod, offset, info);
} else {
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, {});
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info);
}
}
if (explicit_lod) {
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, {});
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info);
}
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, {});
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info);
}();
for (u32 i = 0; i < 4; i++) {