video_core: Bloodborne stabilization pt1 (#543)

* shader_recompiler: Writelane elimination pass + null image fix

* spirv: Implement image derivatives

* texture_cache: Reduce page bit size

* clang format

* slot_vector: Back to debug assert

* vk_graphics_pipeline: Handle null tsharp

* spirv: Revert some change

* vk_instance: Support primitive restart on list topology

* page_manager: Adjust windows exception handler

* clang format

* Remove subres tracking

* Will be done separately
This commit is contained in:
TheTurtle 2024-08-24 22:51:47 +03:00 committed by GitHub
parent 9e4fc17e6c
commit c79b10edc1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 187 additions and 107 deletions

View file

@ -127,7 +127,6 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
@ -139,7 +138,6 @@ void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
@ -151,7 +149,6 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
@ -168,13 +165,18 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
}
void Translator::V_READLANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
const IR::ScalarReg dst{inst.dst[0].code};
const IR::U32 value{GetSrc(inst.src[0])};
const IR::U32 lane{GetSrc(inst.src[1])};
ir.SetScalarReg(dst, ir.ReadLane(value, lane));
}
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
const IR::VectorReg dst{inst.dst[0].code};
const IR::U32 value{GetSrc(inst.src[0])};
const IR::U32 lane{GetSrc(inst.src[1])};
const IR::U32 old_value{GetSrc(inst.dst[0])};
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
}
} // namespace Shader::Gcn

View file

@ -440,13 +440,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
SetDst(inst.dst[0], ir.Imm32(pc));
const IR::ScalarReg dst{inst.dst[0].code};
ir.SetScalarReg(dst, ir.Imm32(pc));
ir.SetScalarReg(dst + 1, ir.Imm32(0));
}
void Translator::S_ADDC_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
}
void Translator::S_MAX_U32(const GcnInst& inst) {

View file

@ -17,6 +17,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
case Opcode::IMAGE_SAMPLE_C_LZ_O:
case Opcode::IMAGE_SAMPLE_D:
return IMAGE_SAMPLE(inst);
case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_LZ:
@ -162,12 +163,15 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
const IR::F32 dref =
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
// Derivatives are tricky because their number depends on the texture type which is located in
// T#. We don't have access to T# though until resource tracking pass. For now assume no
// derivatives are present, otherwise we don't know where coordinates are placed in the address
// stream.
ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction");
const IR::Value derivatives = [&] -> IR::Value {
if (!flags.test(MimgModifier::Derivative)) {
return {};
}
addr_reg = addr_reg + 4;
return ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(addr_reg - 4), ir.GetVectorReg<IR::F32>(addr_reg - 3),
ir.GetVectorReg<IR::F32>(addr_reg - 2), ir.GetVectorReg<IR::F32>(addr_reg - 1));
}();
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
@ -177,6 +181,10 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
// Derivatives are tricky because their number depends on the texture type which is located in
// T#. We don't have access to T# though until resource tracking pass. For now assume if
// derivatives are present, that a 2D image is bound.
const bool has_derivatives = flags.test(MimgModifier::Derivative);
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
IR::TextureInstInfo info{};
@ -186,9 +194,13 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
info.force_level0.Assign(flags.test(MimgModifier::Level0));
info.has_offset.Assign(flags.test(MimgModifier::Offset));
info.explicit_lod.Assign(explicit_lod);
info.has_derivatives.Assign(has_derivatives);
// Issue IR instruction, leaving unknown fields blank to patch later.
const IR::Value texel = [&]() -> IR::Value {
if (has_derivatives) {
return ir.ImageGradient(handle, body, derivatives, offset, {}, info);
}
if (!flags.test(MimgModifier::Pcf)) {
if (explicit_lod) {
return ir.ImageSampleExplicitLod(handle, body, offset, info);