mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 12:26:18 +00:00
shader_recompiler: Implement AMD buffer bounds checking behavior. (#2448)
* shader_recompiler: Implement AMD buffer bounds checking behavior. * shader_recompiler: Use SRT flatbuf for bounds check size. * shader_recompiler: Fix buffer atomic bounds check. * buffer_cache: Prevent false image-to-buffer sync. Lowering vertex fetch to formatted buffer surfaced an issue where a CPU modified range may be overwritten with stale GPU modified image data. * Address review comments.
This commit is contained in:
parent
b06790dfe5
commit
fd3d3c4158
19 changed files with 376 additions and 158 deletions
|
@ -9,6 +9,12 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
||||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* s_load_dwordx4 s[8:11], s[2:3], 0x00
|
||||
* s_load_dwordx4 s[12:15], s[2:3], 0x04
|
||||
|
@ -38,9 +44,8 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
|
|||
if (!info.has_fetch_shader) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
|
||||
|
||||
const auto* code = GetFetchShaderCode(info, info.fetch_shader_sgpr_base);
|
||||
FetchShaderData data{.code = code};
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
|
|
@ -64,6 +64,8 @@ struct FetchShaderData {
|
|||
}
|
||||
};
|
||||
|
||||
const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base);
|
||||
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
|
@ -470,8 +471,29 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
|||
|
||||
void Translator::EmitFetch(const GcnInst& inst) {
|
||||
// Read the pointer to the fetch shader assembly.
|
||||
const auto code_sgpr_base = inst.src[0].code;
|
||||
if (!profile.supports_robust_buffer_access) {
|
||||
// The fetch shader must be inlined to access as regular buffers, so that
|
||||
// bounds checks can be emitted to emulate robust buffer access.
|
||||
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
|
||||
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
||||
// Decode and save instructions
|
||||
u32 sub_pc = 0;
|
||||
while (!slice.atEnd()) {
|
||||
const auto sub_inst = decoder.decodeInstruction(slice);
|
||||
if (sub_inst.opcode == Opcode::S_SETPC_B64) {
|
||||
// Assume we're swapping back to the main shader.
|
||||
break;
|
||||
}
|
||||
TranslateInstruction(sub_inst, sub_pc++);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
info.has_fetch_shader = true;
|
||||
info.fetch_shader_sgpr_base = inst.src[0].code;
|
||||
info.fetch_shader_sgpr_base = code_sgpr_base;
|
||||
|
||||
const auto fetch_data = ParseFetchShader(info);
|
||||
ASSERT(fetch_data.has_value());
|
||||
|
@ -520,6 +542,40 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
|||
info.translation_failed = true;
|
||||
}
|
||||
|
||||
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
||||
// Emit instructions for each category.
|
||||
switch (inst.category) {
|
||||
case InstCategory::DataShare:
|
||||
EmitDataShare(inst);
|
||||
break;
|
||||
case InstCategory::VectorInterpolation:
|
||||
EmitVectorInterpolation(inst);
|
||||
break;
|
||||
case InstCategory::ScalarMemory:
|
||||
EmitScalarMemory(inst);
|
||||
break;
|
||||
case InstCategory::VectorMemory:
|
||||
EmitVectorMemory(inst);
|
||||
break;
|
||||
case InstCategory::Export:
|
||||
EmitExport(inst);
|
||||
break;
|
||||
case InstCategory::FlowControl:
|
||||
EmitFlowControl(pc, inst);
|
||||
break;
|
||||
case InstCategory::ScalarALU:
|
||||
EmitScalarAlu(inst);
|
||||
break;
|
||||
case InstCategory::VectorALU:
|
||||
EmitVectorAlu(inst);
|
||||
break;
|
||||
case InstCategory::DebugProfile:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
if (inst_list.empty()) {
|
||||
|
@ -537,37 +593,7 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
|||
continue;
|
||||
}
|
||||
|
||||
// Emit instructions for each category.
|
||||
switch (inst.category) {
|
||||
case InstCategory::DataShare:
|
||||
translator.EmitDataShare(inst);
|
||||
break;
|
||||
case InstCategory::VectorInterpolation:
|
||||
translator.EmitVectorInterpolation(inst);
|
||||
break;
|
||||
case InstCategory::ScalarMemory:
|
||||
translator.EmitScalarMemory(inst);
|
||||
break;
|
||||
case InstCategory::VectorMemory:
|
||||
translator.EmitVectorMemory(inst);
|
||||
break;
|
||||
case InstCategory::Export:
|
||||
translator.EmitExport(inst);
|
||||
break;
|
||||
case InstCategory::FlowControl:
|
||||
translator.EmitFlowControl(pc, inst);
|
||||
break;
|
||||
case InstCategory::ScalarALU:
|
||||
translator.EmitScalarAlu(inst);
|
||||
break;
|
||||
case InstCategory::VectorALU:
|
||||
translator.EmitVectorAlu(inst);
|
||||
break;
|
||||
case InstCategory::DebugProfile:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
translator.TranslateInstruction(inst, pc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -58,6 +58,8 @@ public:
|
|||
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
|
||||
const Profile& profile);
|
||||
|
||||
void TranslateInstruction(const GcnInst& inst, u32 pc);
|
||||
|
||||
// Instruction categories
|
||||
void EmitPrologue();
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
|
|
|
@ -195,6 +195,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||
buffer_info.typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
|
@ -241,6 +242,7 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
|||
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||
buffer_info.typed.Assign(true);
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
|
@ -283,6 +285,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||
buffer_info.typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
|
@ -339,6 +342,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
|||
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||
buffer_info.typed.Assign(true);
|
||||
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue