mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-17 17:05:02 +00:00
video_core: Add image support
This commit is contained in:
parent
729e166cd3
commit
d59b102b6f
48 changed files with 1264 additions and 259 deletions
|
@ -8,15 +8,18 @@ namespace Shader::Gcn {
|
|||
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
|
||||
const IR::U32U64& address) {
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
const IR::U32 value = handle.IsEmpty() ? ir.ReadConst(address, ir.Imm32(i))
|
||||
: ir.ReadConstBuffer(handle, address, ir.Imm32(i));
|
||||
ir.SetScalarReg(dst_reg++, value);
|
||||
if (handle.IsEmpty()) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i)));
|
||||
} else {
|
||||
const IR::U32 index = ir.IAdd(address, ir.Imm32(i));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2);
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||
|
@ -30,14 +33,12 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
|
||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2);
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||
ir.Imm32(2))};
|
||||
const IR::Value vsharp =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
|
||||
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
Load(ir, num_dwords, vsharp, dst_reg, offset);
|
||||
}
|
||||
|
|
|
@ -73,8 +73,14 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
return ir.Imm32(1.f);
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
return ir.Imm32(0.5f);
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
return ir.Imm32(2.0f);
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
return ir.Imm32(4.0f);
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
return ir.Imm32(-0.5f);
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
return ir.Imm32(-1.0f);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -135,6 +141,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_MUL_I32:
|
||||
translator.S_MUL_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_F32:
|
||||
translator.V_MAD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MOV_B32:
|
||||
translator.V_MOV(inst);
|
||||
break;
|
||||
|
@ -144,12 +153,39 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_MUL_F32:
|
||||
translator.V_MUL_F32(inst);
|
||||
break;
|
||||
case Opcode::V_AND_B32:
|
||||
translator.V_AND_B32(inst);
|
||||
break;
|
||||
case Opcode::V_LSHLREV_B32:
|
||||
translator.V_LSHLREV_B32(inst);
|
||||
break;
|
||||
case Opcode::V_ADD_I32:
|
||||
translator.V_ADD_I32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_I32:
|
||||
translator.V_CVT_F32_I32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_U32:
|
||||
translator.V_CVT_F32_U32(inst);
|
||||
break;
|
||||
case Opcode::S_SWAPPC_B64:
|
||||
ASSERT(info.stage == Stage::Vertex);
|
||||
translator.EmitFetch(inst);
|
||||
break;
|
||||
case Opcode::S_WAITCNT:
|
||||
break; // Ignore for now.
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX2:
|
||||
translator.S_BUFFER_LOAD_DWORD(2, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX4:
|
||||
translator.S_BUFFER_LOAD_DWORD(4, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX8:
|
||||
translator.S_BUFFER_LOAD_DWORD(8, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX16:
|
||||
translator.S_BUFFER_LOAD_DWORD(16, inst);
|
||||
break;
|
||||
|
@ -180,7 +216,8 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_ENDPGM:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown opcode {}", u32(inst.opcode));
|
||||
const u32 opcode = u32(inst.opcode);
|
||||
UNREACHABLE_MSG("Unknown opcode {}", opcode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,12 @@ public:
|
|||
void V_MUL_F32(const GcnInst& inst);
|
||||
void V_CMP_EQ_U32(const GcnInst& inst);
|
||||
void V_CNDMASK_B32(const GcnInst& inst);
|
||||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
void V_ADD_I32(const GcnInst& inst);
|
||||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
void V_CVT_F32_U32(const GcnInst& inst);
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#pragma clang optimize off
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
@ -61,4 +61,45 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||
}
|
||||
|
||||
void Translator::V_AND_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_LSHLREV_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, src0));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1));
|
||||
// TODO: Carry
|
||||
}
|
||||
|
||||
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.ConvertSToF(32, 32, src0));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.ConvertUToF(32, 32, src0));
|
||||
}
|
||||
|
||||
void Translator::V_MAD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -63,27 +63,34 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
||||
// in coords field of the instruction. Then the resource tracking pass will patch the
|
||||
// IR instruction to fill in lod_clamp field. The vector can also be used
|
||||
// as coords directly as SPIR-V will ignore any extra parameters.
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++),
|
||||
ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++));
|
||||
// IR instruction to fill in lod_clamp field.
|
||||
const IR::Value body = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||
|
||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
||||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||
if (!flags.test(MimgModifier::Pcf)) {
|
||||
if (explicit_lod) {
|
||||
return ir.ImageSampleExplicitLod(handle, body, lod, offset, {});
|
||||
return ir.ImageSampleExplicitLod(handle, body, lod, offset, info);
|
||||
} else {
|
||||
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, {});
|
||||
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info);
|
||||
}
|
||||
}
|
||||
if (explicit_lod) {
|
||||
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, {});
|
||||
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info);
|
||||
}
|
||||
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, {});
|
||||
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info);
|
||||
}();
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue