Random fixes (#3216)

* buffer_cache: Handle inline data to flexible memory

* control_flow: Fix single instruction scopes edge case

Fixes the following pattern

v_cmpx_gt_u32 cond
buffer_store_dword value
.LABEL:

Before
buffer[index] = value;

After
if (cond)
{
    buffer[index] = value;
}

* vector_memory: Handle soffset when offen is false

When offen is not used we can substitute the offset argument with soffset and have it handled correctly

* scalar_alu: Handle sharp moves with S_MOV_B64

This fixes unable to track sharp errors when this pattern is used in a shader

* emulator: Add log

* video_core: Bump binary info search range and buffer num
This commit is contained in:
TheTurtle 2025-07-09 17:00:06 +03:00 committed by GitHub
parent f5336358ea
commit 7d4b875ee3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 33 additions and 9 deletions

View file

@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
LOG_INFO(Config, "GPU readbackLinearImages: {}", Config::readbackLinearImages());
LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());

View file

@ -700,7 +700,7 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
U32[4], U32[4], U32[4], U32[4], U32[4]),
U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]),
"AuxData")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, PushData::Step0Index, "sr0");
@ -715,6 +715,7 @@ void EmitContext::DefinePushDataBlock() {
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
@ -727,6 +728,7 @@ void EmitContext::DefinePushDataBlock() {
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data");
interfaces.push_back(push_data_block);

View file

@ -188,14 +188,15 @@ void CFG::SplitDivergenceScopes() {
const bool is_close = is_close_scope(inst);
if ((is_close || index == blk->end_index) && curr_begin != -1) {
// If there are no instructions inside scope don't do anything.
if (index - curr_begin == 1) {
if (index - curr_begin == 1 && is_close) {
curr_begin = -1;
continue;
}
// If all instructions in the scope ignore exec masking, we shouldn't insert a
// scope.
const auto start = inst_list.begin() + curr_begin + 1;
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
if (!std::ranges::all_of(start, inst_list.begin() + index + !is_close,
IgnoresExecMask)) {
// Determine the first instruction affected by the exec mask.
do {
++curr_begin;

View file

@ -586,6 +586,15 @@ void Translator::S_MOV(const GcnInst& inst) {
}
void Translator::S_MOV_B64(const GcnInst& inst) {
// Moving SGPR to SGPR is used for thread masks, like most operations, but it can also be used
// for moving sharps.
if (inst.dst[0].field == OperandField::ScalarGPR &&
inst.src[0].field == OperandField::ScalarGPR) {
ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code),
ir.GetScalarReg(IR::ScalarReg(inst.src[0].code)));
ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code + 1),
ir.GetScalarReg(IR::ScalarReg(inst.src[0].code + 1)));
}
const IR::U1 src = [&] {
switch (inst.src[0].field) {
case OperandField::VccLo:

View file

@ -193,8 +193,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value soffset{GetSrc(inst.src[3])};
if (info.stage != Stage::Geometry) {
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
"Non immediate offset not supported");
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0 || !mubuf.offen,
"Having both scalar and vector offsets is not supported");
}
const IR::Value address = [&] -> IR::Value {
@ -204,15 +204,21 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
if (mubuf.idxen && mubuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mubuf.idxen && !soffset.IsImmediate()) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
}
if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
if (!soffset.IsImmediate()) {
return soffset;
}
return {};
}();
IR::BufferInstInfo buffer_info{};
buffer_info.index_enable.Assign(mubuf.idxen);
buffer_info.offset_enable.Assign(mubuf.offen);
buffer_info.offset_enable.Assign(mubuf.offen || !soffset.IsImmediate());
buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc);

View file

@ -25,7 +25,7 @@ namespace Shader {
static constexpr size_t NumUserDataRegs = 16;
static constexpr size_t NumImages = 64;
static constexpr size_t NumBuffers = 32;
static constexpr size_t NumBuffers = 40;
static constexpr size_t NumSamplers = 16;
static constexpr size_t NumFMasks = 8;

View file

@ -603,6 +603,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
// TODO: handle proper synchronization, for now signal that update is done
// immediately
regs.cp_strmout_cntl.offset_update_done = 1;
} else if (event->event_index.Value() == EventIndex::ZpassDone) {
LOG_WARNING(Render, "Unimplemented occlusion query");
}
break;
}

View file

@ -88,7 +88,7 @@ struct Liverpool {
}
};
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) {
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) {
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
if (code[0] == token_mov_vcchi) {

View file

@ -312,7 +312,10 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
if (!is_gds) {
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
if (!memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes)) {
std::memcpy(std::bit_cast<void*>(address), value, num_bytes);
return;
}
if (!IsRegionRegistered(address, num_bytes)) {
return;
}