From e5b675d607502bbe8d78204794a95c22ef2c50af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 13 May 2025 22:56:20 +0200 Subject: [PATCH] Handle IT_WAIT_REG_MEM with Register argument (#2927) --- src/core/libraries/gnmdriver/gnmdriver.cpp | 2 +- src/video_core/amdgpu/liverpool.cpp | 6 ++--- src/video_core/amdgpu/pm4_cmds.h | 26 +++++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index f2f40e0e3..9cf340050 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -179,7 +179,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add auto* wait_reg_mem = reinterpret_cast(cmdbuf); wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u); - wait_reg_mem->poll_addr_lo = u32(addr & addr_mask); + wait_reg_mem->poll_addr_lo_raw = u32(addr & addr_mask); wait_reg_mem->poll_addr_hi = u32(addr >> 32u); wait_reg_mem->ref = ref; wait_reg_mem->mask = mask; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 598288085..0fbfa8b9b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -696,10 +696,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); if (vo_port->IsVoLabel(wait_addr) && num_submits == mapped_queues[GfxQueueId].submits.size()) { - vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); + vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); }); break; } - while (!wait_reg_mem->Test()) { + while (!wait_reg_mem->Test(regs.reg_array)) { YIELD_GFX(); } break; @@ -934,7 +934,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); - while (!wait_reg_mem->Test()) { + while (!wait_reg_mem->Test(regs.reg_array)) { YIELD_ASC(vqid); } break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index cd175f6c9..066fa4b62 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -474,7 +474,12 @@ struct PM4CmdWaitRegMem { BitField<8, 1, Engine> engine; u32 raw; }; - u32 poll_addr_lo; + union { + BitField<0, 16, u32> reg; + BitField<2, 30, u32> poll_addr_lo; + BitField<0, 2, u32> swap; + u32 poll_addr_lo_raw; + }; u32 poll_addr_hi; u32 ref; u32 mask; @@ -485,28 +490,33 @@ struct PM4CmdWaitRegMem { return std::bit_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); } - bool Test() const { + u32 Reg() const { + return reg.Value(); + } + + bool Test(const std::array& regs) const { + u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()]; switch (function.Value()) { case Function::Always: { return true; } case Function::LessThan: { - return (*Address() & mask) < ref; + return (value & mask) < ref; } case Function::LessThanEqual: { - return (*Address() & mask) <= ref; + return (value & mask) <= ref; } case Function::Equal: { - return (*Address() & mask) == ref; + return (value & mask) == ref; } case Function::NotEqual: { - return (*Address() & mask) != ref; + return (value & mask) != ref; } case Function::GreaterThanEqual: { - return (*Address() & mask) >= ref; + return (value & mask) >= ref; } case Function::GreaterThan: { - return (*Address() & mask) > ref; + return (value & mask) > ref; } case Function::Reserved: [[fallthrough]];