mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 08:06:20 +00:00
shader_recompiler: Implement data share append and consume operations (#814)
* shader_recompiler: Add more format swap modes * texture_cache: Handle stencil texture reads * emulator: Support loading font library * readme: Add thanks section * shader_recompiler: Constant buffers as integers * shader_recompiler: Typed buffers as integers * shader_recompiler: Separate thread bit scalars * We can assume guest shader never mixes them with normal sgprs. This helps avoid errors where ssa could view an sgpr write dominating a thread bit read, due to how control flow is structurized, even though its not possible in actual control flow * shader_recompiler: Implement data append/consume operations * clang format * buffer_cache: Simplify invalidation scheme * video_core: Remove some invalidation remnants * adjust
This commit is contained in:
parent
649527a235
commit
13743b27fc
34 changed files with 512 additions and 272 deletions
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
|
@ -42,11 +41,10 @@ bool IsBufferAtomic(const IR::Inst& inst) {
|
|||
|
||||
bool IsBufferStore(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferAtomic(inst);
|
||||
|
@ -55,25 +53,28 @@ bool IsBufferStore(const IR::Inst& inst) {
|
|||
|
||||
bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferStore(inst);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::DataAppend ||
|
||||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
||||
}
|
||||
|
||||
bool IsTextureBufferInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
|
||||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
|
||||
}
|
||||
|
||||
static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
switch (num_format) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
switch (data_format) {
|
||||
|
@ -98,19 +99,15 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for
|
|||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
return IR::Type::F32;
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
return IR::Type::U32;
|
||||
|
@ -191,6 +188,10 @@ public:
|
|||
|
||||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
// Only one GDS binding can exist.
|
||||
if (desc.is_gds_buffer && existing.is_gds_buffer) {
|
||||
return true;
|
||||
}
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset &&
|
||||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
|
@ -399,8 +400,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -609,6 +609,51 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
@ -625,6 +670,10 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingInstruction(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue