mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-18 17:34:52 +00:00
video_core: Bloodborne stabilization pt1 (#543)
* shader_recompiler: Writelane elimination pass + null image fix * spirv: Implement image derivatives * texture_cache: Reduce page bit size * clang format * slot_vector: Back to debug assert * vk_graphics_pipeline: Handle null tsharp * spirv: Revert some change * vk_instance: Support primitive restart on list topology * page_manager: Adjust windows exception handler * clang format * Remove subres tracking * Will be done separately
This commit is contained in:
parent
9e4fc17e6c
commit
c79b10edc1
25 changed files with 187 additions and 107 deletions
|
@ -127,7 +127,6 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
|||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
|
@ -139,7 +138,6 @@ void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
|
|||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
|
@ -151,7 +149,6 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
|
|||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
|
@ -168,13 +165,18 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_READLANE_B32(const GcnInst& inst) {
|
||||
ASSERT(info.stage != Stage::Compute);
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
const IR::U32 lane{GetSrc(inst.src[1])};
|
||||
ir.SetScalarReg(dst, ir.ReadLane(value, lane));
|
||||
}
|
||||
|
||||
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
||||
ASSERT(info.stage != Stage::Compute);
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
const IR::VectorReg dst{inst.dst[0].code};
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
const IR::U32 lane{GetSrc(inst.src[1])};
|
||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -440,13 +440,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
|
|||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
SetDst(inst.dst[0], ir.Imm32(pc));
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
|
||||
const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
|
||||
}
|
||||
|
||||
void Translator::S_MAX_U32(const GcnInst& inst) {
|
||||
|
|
|
@ -17,6 +17,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_D:
|
||||
return IMAGE_SAMPLE(inst);
|
||||
case Opcode::IMAGE_GATHER4_C:
|
||||
case Opcode::IMAGE_GATHER4_LZ:
|
||||
|
@ -162,12 +163,15 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||
const IR::F32 dref =
|
||||
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||
|
||||
// Derivatives are tricky because their number depends on the texture type which is located in
|
||||
// T#. We don't have access to T# though until resource tracking pass. For now assume no
|
||||
// derivatives are present, otherwise we don't know where coordinates are placed in the address
|
||||
// stream.
|
||||
ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction");
|
||||
const IR::Value derivatives = [&] -> IR::Value {
|
||||
if (!flags.test(MimgModifier::Derivative)) {
|
||||
return {};
|
||||
}
|
||||
addr_reg = addr_reg + 4;
|
||||
return ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(addr_reg - 4), ir.GetVectorReg<IR::F32>(addr_reg - 3),
|
||||
ir.GetVectorReg<IR::F32>(addr_reg - 2), ir.GetVectorReg<IR::F32>(addr_reg - 1));
|
||||
}();
|
||||
|
||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
||||
|
@ -177,6 +181,10 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||
|
||||
// Derivatives are tricky because their number depends on the texture type which is located in
|
||||
// T#. We don't have access to T# though until resource tracking pass. For now assume if
|
||||
// derivatives are present, that a 2D image is bound.
|
||||
const bool has_derivatives = flags.test(MimgModifier::Derivative);
|
||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
|
@ -186,9 +194,13 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
info.has_derivatives.Assign(has_derivatives);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
if (has_derivatives) {
|
||||
return ir.ImageGradient(handle, body, derivatives, offset, {}, info);
|
||||
}
|
||||
if (!flags.test(MimgModifier::Pcf)) {
|
||||
if (explicit_lod) {
|
||||
return ir.ImageSampleExplicitLod(handle, body, offset, info);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue