From 7d4b875ee33c47abc4edaa5a2ddacfbb7d32f9d8 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Wed, 9 Jul 2025 17:00:06 +0300 Subject: [PATCH] Random fixes (#3216) * buffer_cache: Handle inline data to flexible memory * control_flow: Fix single instruction scopes edge case Fixes the following pattern v_cmpx_gt_u32 cond buffer_store_dword value .LABEL: Before buffer[index] = value; After if (cond) { buffer[index] = value; } * vector_memory: Handle soffset when offen is false When offen is not used we can substitute the offset argument with soffset and have it handled correctly * scalar_alu: Handle sharp moves with S_MOV_B64 This fixes unable to track sharp errors when this pattern is used in a shader * emulator: Add log * video_core: Bump binary info search range and buffer num --- src/emulator.cpp | 1 + .../backend/spirv/spirv_emit_context.cpp | 4 +++- .../frontend/control_flow_graph.cpp | 5 +++-- .../frontend/translate/scalar_alu.cpp | 9 +++++++++ .../frontend/translate/vector_memory.cpp | 12 +++++++++--- src/shader_recompiler/info.h | 2 +- src/video_core/amdgpu/liverpool.cpp | 2 ++ src/video_core/amdgpu/liverpool.h | 2 +- src/video_core/buffer_cache/buffer_cache.cpp | 5 ++++- 9 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/emulator.cpp b/src/emulator.cpp index 332287d22..480ceee0b 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector ar LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks()); + LOG_INFO(Config, "GPU readbackLinearImages: {}", Config::readbackLinearImages()); LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess()); LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 77336c9ec..fe489f1b6 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -700,7 +700,7 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4], - U32[4], U32[4], U32[4], U32[4], U32[4]), + U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, PushData::Step0Index, "sr0"); @@ -715,6 +715,7 @@ void EmitContext::DefinePushDataBlock() { MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3"); MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0"); MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1"); + MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2"); MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U); @@ -727,6 +728,7 @@ void EmitContext::DefinePushDataBlock() { MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index b53db9e94..805fdb108 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -188,14 +188,15 @@ void CFG::SplitDivergenceScopes() { const bool is_close = is_close_scope(inst); if ((is_close || index == blk->end_index) && curr_begin != -1) { // If there are no instructions inside scope don't do anything. - if (index - curr_begin == 1) { + if (index - curr_begin == 1 && is_close) { curr_begin = -1; continue; } // If all instructions in the scope ignore exec masking, we shouldn't insert a // scope. const auto start = inst_list.begin() + curr_begin + 1; - if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) { + if (!std::ranges::all_of(start, inst_list.begin() + index + !is_close, + IgnoresExecMask)) { // Determine the first instruction affected by the exec mask. do { ++curr_begin; diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 48f977f49..276b55567 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -586,6 +586,15 @@ void Translator::S_MOV(const GcnInst& inst) { } void Translator::S_MOV_B64(const GcnInst& inst) { + // Moving SGPR to SGPR is used for thread masks, like most operations, but it can also be used + // for moving sharps. + if (inst.dst[0].field == OperandField::ScalarGPR && + inst.src[0].field == OperandField::ScalarGPR) { + ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code), + ir.GetScalarReg(IR::ScalarReg(inst.src[0].code))); + ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code + 1), + ir.GetScalarReg(IR::ScalarReg(inst.src[0].code + 1))); + } const IR::U1 src = [&] { switch (inst.src[0].field) { case OperandField::VccLo: diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 91f545cfd..68b619c0a 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -193,8 +193,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_ const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::Value soffset{GetSrc(inst.src[3])}; if (info.stage != Stage::Geometry) { - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, - "Non immediate offset not supported"); + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0 || !mubuf.offen, + "Having both scalar and vector offsets is not supported"); } const IR::Value address = [&] -> IR::Value { @@ -204,15 +204,21 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_ if (mubuf.idxen && mubuf.offen) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); } + if (mubuf.idxen && !soffset.IsImmediate()) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); + } if (mubuf.idxen || mubuf.offen) { return ir.GetVectorReg(vaddr); } + if (!soffset.IsImmediate()) { + return soffset; + } return {}; }(); IR::BufferInstInfo buffer_info{}; buffer_info.index_enable.Assign(mubuf.idxen); - buffer_info.offset_enable.Assign(mubuf.offen); + buffer_info.offset_enable.Assign(mubuf.offen || !soffset.IsImmediate()); buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 72977b711..9703643e8 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -25,7 +25,7 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; static constexpr size_t NumImages = 64; -static constexpr size_t NumBuffers = 32; +static constexpr size_t NumBuffers = 40; static constexpr size_t NumSamplers = 16; static constexpr size_t NumFMasks = 8; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index e264de74a..3e66fba6a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -603,6 +603,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanevent_index.Value() == EventIndex::ZpassDone) { + LOG_WARNING(Render, "Unimplemented occlusion query"); } break; } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0613823ab..c07e9f63a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -88,7 +88,7 @@ struct Liverpool { } }; - static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) { + static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) { constexpr u32 token_mov_vcchi = 0xBEEB03FF; if (code[0] == token_mov_vcchi) { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 8a7e99ea0..c1110e54d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -312,7 +312,10 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); if (!is_gds) { - ASSERT(memory->TryWriteBacking(std::bit_cast(address), value, num_bytes)); + if (!memory->TryWriteBacking(std::bit_cast(address), value, num_bytes)) { + std::memcpy(std::bit_cast(address), value, num_bytes); + return; + } if (!IsRegionRegistered(address, num_bytes)) { return; }