mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-14 06:33:15 +00:00
* Import memory * 64K pages and fix memory mapping * Queue coverage * Buffer syncing, faulted readback adn BDA in Buffer * Base DMA implementation * Preparations for implementing SPV DMA access * Base impl (pending 16K pages and getbuffersize) * 16K pages and stack overflow fix * clang-format * clang-format but for real this time * Try to fix macOS build * Correct decltype * Add testing log * Fix stride and patch phi node blocks * No need to check if it is a deleted buffer * Clang format once more * Offset in bytes * Removed host buffers (may do it in another PR) Also some random barrier fixes * Add IR dumping from my read-const branch * clang-format * Correct size insteed of end * Fix incorrect assert * Possible fix for NieR deadlock * Copy to avoid deadlock * Use 2 mutexes insteed of copy * Attempt to range sync error * Revert "Attempt to range sync error" This reverts commit dd287b48682b50f215680bb0956e39c2809bf3fe. * Fix size truncated when syncing range And memory barrier * Some fixes (and async testing (doesn't work)) * Use compute to parse fault buffer * Process faults on submit * Only sync in the first time we see a readconst Thsi is partialy wrong. We need to save the state into the submission context itself, not the rasterizer since we can yield and process another sumission (if im not understanding wrong). * Use spec const and 32 bit atomic * 32 bit counter * Fix store_index * Better sync (WIP, breaks PR now) * Fixes for better sync * Better sync * Remove memory coveragte logic * Point sirit to upstream * Less waiting and barriers * Correctly checkout moltenvk * Bring back applying pending operations in wait * Sync the whole buffer insteed of only the range * Implement recursive shared/scoped locks * Iterators * Faster syncing with ranges * Some alignment fixes * fixed clang format * Fix clang-format again * Port page_manager from readbacks-poc * clang-format * Defer memory protect * Remove RENDERER_TRACE * Experiment: only sync on first readconst * Added profiling (will be removed) * Don't sync entire buffers * Added logging for testing * Updated temporary workaround to use 4k pages * clang.-format * Cleanup part 1 * Make ReadConst a SPIR-V function --------- Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
1060 lines
47 KiB
C++
1060 lines
47 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <boost/preprocessor/stringize.hpp>
|
|
|
|
#include "common/assert.h"
|
|
#include "common/config.h"
|
|
#include "common/debug.h"
|
|
#include "common/polyfill_thread.h"
|
|
#include "common/thread.h"
|
|
#include "core/debug_state.h"
|
|
#include "core/libraries/videoout/driver.h"
|
|
#include "core/memory.h"
|
|
#include "video_core/amdgpu/liverpool.h"
|
|
#include "video_core/amdgpu/pm4_cmds.h"
|
|
#include "video_core/renderdoc.h"
|
|
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
|
|
|
namespace AmdGpu {
|
|
|
|
static const char* dcb_task_name{"DCB_TASK"};
|
|
static const char* ccb_task_name{"CCB_TASK"};
|
|
|
|
#define MAX_NAMES 56
|
|
static_assert(Liverpool::NumComputeRings <= MAX_NAMES);
|
|
|
|
#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n),
|
|
#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)}
|
|
|
|
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
|
|
|
#define YIELD(name) \
|
|
FIBER_EXIT; \
|
|
co_yield {}; \
|
|
FIBER_ENTER(name);
|
|
|
|
#define YIELD_CE() YIELD(ccb_task_name)
|
|
#define YIELD_GFX() YIELD(dcb_task_name)
|
|
#define YIELD_ASC(id) YIELD(acb_task_name[id])
|
|
|
|
#define RESUME(task, name) \
|
|
FIBER_EXIT; \
|
|
task.handle.resume(); \
|
|
FIBER_ENTER(name);
|
|
|
|
#define RESUME_CE(task) RESUME(task, ccb_task_name)
|
|
#define RESUME_GFX(task) RESUME(task, dcb_task_name)
|
|
#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id])
|
|
|
|
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
|
|
|
static std::span<const u32> NextPacket(std::span<const u32> span, size_t offset) {
|
|
if (offset > span.size()) {
|
|
LOG_ERROR(
|
|
Lib_GnmDriver,
|
|
": packet length exceeds remaining submission size. Packet dword count={}, remaining "
|
|
"submission dwords={}",
|
|
offset, span.size());
|
|
// Return empty subspan so check for next packet bails out
|
|
return {};
|
|
}
|
|
|
|
return span.subspan(offset);
|
|
}
|
|
|
|
Liverpool::Liverpool() {
|
|
process_thread = std::jthread{std::bind_front(&Liverpool::Process, this)};
|
|
}
|
|
|
|
Liverpool::~Liverpool() {
|
|
process_thread.request_stop();
|
|
process_thread.join();
|
|
}
|
|
|
|
void Liverpool::Process(std::stop_token stoken) {
|
|
Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor");
|
|
|
|
while (!stoken.stop_requested()) {
|
|
{
|
|
std::unique_lock lk{submit_mutex};
|
|
Common::CondvarWait(submit_cv, lk, stoken,
|
|
[this] { return num_commands || num_submits || submit_done; });
|
|
}
|
|
if (stoken.stop_requested()) {
|
|
break;
|
|
}
|
|
|
|
VideoCore::StartCapture();
|
|
|
|
curr_qid = -1;
|
|
|
|
while (num_submits || num_commands) {
|
|
|
|
// Process incoming commands with high priority
|
|
while (num_commands) {
|
|
Common::UniqueFunction<void> callback{};
|
|
{
|
|
std::unique_lock lk{submit_mutex};
|
|
callback = std::move(command_queue.front());
|
|
command_queue.pop();
|
|
--num_commands;
|
|
}
|
|
callback();
|
|
}
|
|
|
|
curr_qid = (curr_qid + 1) % num_mapped_queues;
|
|
|
|
auto& queue = mapped_queues[curr_qid];
|
|
|
|
Task::Handle task{};
|
|
{
|
|
std::scoped_lock lock{queue.m_access};
|
|
if (queue.submits.empty()) {
|
|
continue;
|
|
}
|
|
task = queue.submits.front();
|
|
}
|
|
task.resume();
|
|
|
|
if (task.done()) {
|
|
task.destroy();
|
|
|
|
std::scoped_lock lock{queue.m_access};
|
|
queue.submits.pop();
|
|
|
|
--num_submits;
|
|
std::scoped_lock lock2{submit_mutex};
|
|
submit_cv.notify_all();
|
|
}
|
|
}
|
|
|
|
if (submit_done) {
|
|
VideoCore::EndCapture();
|
|
|
|
if (rasterizer) {
|
|
rasterizer->ProcessFaults();
|
|
rasterizer->Flush();
|
|
}
|
|
submit_done = false;
|
|
}
|
|
|
|
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
|
|
}
|
|
}
|
|
|
|
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
|
FIBER_ENTER(ccb_task_name);
|
|
|
|
while (!ccb.empty()) {
|
|
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
|
const u32 type = header->type;
|
|
if (type != 3) {
|
|
// No other types of packets were spotted so far
|
|
UNREACHABLE_MSG("Invalid PM4 type {}", type);
|
|
}
|
|
|
|
const PM4ItOpcode opcode = header->type3.opcode;
|
|
const auto* it_body = reinterpret_cast<const u32*>(header) + 1;
|
|
switch (opcode) {
|
|
case PM4ItOpcode::Nop: {
|
|
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WriteConstRam: {
|
|
const auto* write_const = reinterpret_cast<const PM4WriteConstRam*>(header);
|
|
memcpy(cblock.constants_heap.data() + write_const->Offset(), &write_const->data,
|
|
write_const->Size());
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DumpConstRam: {
|
|
const auto* dump_const = reinterpret_cast<const PM4DumpConstRam*>(header);
|
|
memcpy(dump_const->Address<void*>(),
|
|
cblock.constants_heap.data() + dump_const->Offset(), dump_const->Size());
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IncrementCeCounter: {
|
|
++cblock.ce_count;
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
|
const auto diff = it_body[0];
|
|
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
|
YIELD_CE();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IndirectBufferConst: {
|
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
|
auto task =
|
|
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
|
RESUME_CE(task);
|
|
|
|
while (!task.handle.done()) {
|
|
YIELD_CE();
|
|
RESUME_CE(task);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
const u32 count = header->type3.NumWords();
|
|
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
|
static_cast<u32>(opcode), count);
|
|
}
|
|
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
|
|
}
|
|
|
|
FIBER_EXIT;
|
|
}
|
|
|
|
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
|
FIBER_ENTER(dcb_task_name);
|
|
|
|
cblock.Reset();
|
|
|
|
// TODO: potentially, ASCs also can depend on CE and in this case the
|
|
// CE task should be moved into more global scope
|
|
Task ce_task{};
|
|
|
|
if (!ccb.empty()) {
|
|
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
|
ce_task = ProcessCeUpdate(ccb);
|
|
RESUME_GFX(ce_task);
|
|
}
|
|
|
|
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
|
while (!dcb.empty()) {
|
|
const auto* header = reinterpret_cast<const PM4Header*>(dcb.data());
|
|
const u32 type = header->type;
|
|
|
|
switch (type) {
|
|
case 0:
|
|
case 1:
|
|
UNREACHABLE_MSG("Unsupported PM4 type {}", type);
|
|
break;
|
|
case 2:
|
|
// Type-2 packet are used for padding purposes
|
|
dcb = NextPacket(dcb, 1);
|
|
continue;
|
|
case 3:
|
|
const u32 count = header->type3.NumWords();
|
|
const PM4ItOpcode opcode = header->type3.opcode;
|
|
switch (opcode) {
|
|
case PM4ItOpcode::Nop: {
|
|
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
|
if (nop->header.count.Value() == 0) {
|
|
break;
|
|
}
|
|
|
|
switch (nop->data_block[0]) {
|
|
case PM4CmdNop::PayloadType::PatchedFlip: {
|
|
// There is no evidence that GPU CP drives flip events by parsing
|
|
// special NOP packets. For convenience lets assume that it does.
|
|
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
|
|
break;
|
|
}
|
|
case PM4CmdNop::PayloadType::DebugMarkerPush: {
|
|
const auto marker_sz = nop->header.count.Value() * 2;
|
|
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
|
|
marker_sz};
|
|
if (rasterizer) {
|
|
rasterizer->ScopeMarkerBegin(label, true);
|
|
}
|
|
break;
|
|
}
|
|
case PM4CmdNop::PayloadType::DebugColorMarkerPush: {
|
|
const auto marker_sz = nop->header.count.Value() * 2;
|
|
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
|
|
marker_sz};
|
|
const u32 color = *reinterpret_cast<const u32*>(
|
|
reinterpret_cast<const u8*>(&nop->data_block[1]) + marker_sz);
|
|
if (rasterizer) {
|
|
rasterizer->ScopedMarkerInsertColor(label, color, true);
|
|
}
|
|
break;
|
|
}
|
|
case PM4CmdNop::PayloadType::DebugMarkerPop: {
|
|
if (rasterizer) {
|
|
rasterizer->ScopeMarkerEnd(true);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::ContextControl: {
|
|
break;
|
|
}
|
|
case PM4ItOpcode::ClearState: {
|
|
regs.SetDefaults();
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetConfigReg: {
|
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
|
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
|
|
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
|
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetContextReg: {
|
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
|
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
|
|
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
|
|
|
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
|
|
|
// In the case of HW, render target memory has alignment as color block operates on
|
|
// tiles. There is no information of actual resource extents stored in CB context
|
|
// regs, so any deduction of it from slices/pitch will lead to a larger surface
|
|
// created. The same applies to the depth targets. Fortunately, the guest always
|
|
// sends a trailing NOP packet right after the context regs setup, so we can use the
|
|
// heuristic below and extract the hint to determine actual resource dims.
|
|
|
|
switch (reg_addr) {
|
|
case ContextRegs::CbColor0Base:
|
|
case ContextRegs::CbColor1Base:
|
|
case ContextRegs::CbColor2Base:
|
|
case ContextRegs::CbColor3Base:
|
|
case ContextRegs::CbColor4Base:
|
|
case ContextRegs::CbColor5Base:
|
|
case ContextRegs::CbColor6Base:
|
|
case ContextRegs::CbColor7Base: {
|
|
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
|
|
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
|
|
ASSERT(col_buf_id < NumColorBuffers);
|
|
|
|
const auto nop_offset = header->type3.count;
|
|
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
|
|
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
|
"NOP hint is missing in CB setup sequence");
|
|
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
|
} else {
|
|
last_cb_extent[col_buf_id].raw = 0;
|
|
}
|
|
break;
|
|
}
|
|
case ContextRegs::CbColor0Cmask:
|
|
case ContextRegs::CbColor1Cmask:
|
|
case ContextRegs::CbColor2Cmask:
|
|
case ContextRegs::CbColor3Cmask:
|
|
case ContextRegs::CbColor4Cmask:
|
|
case ContextRegs::CbColor5Cmask:
|
|
case ContextRegs::CbColor6Cmask:
|
|
case ContextRegs::CbColor7Cmask: {
|
|
const auto col_buf_id =
|
|
(reg_addr - ContextRegs::CbColor0Cmask) /
|
|
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
|
|
ASSERT(col_buf_id < NumColorBuffers);
|
|
|
|
const auto nop_offset = header->type3.count;
|
|
if (nop_offset == 0x04) {
|
|
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
|
"NOP hint is missing in CB setup sequence");
|
|
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
|
}
|
|
break;
|
|
}
|
|
case ContextRegs::DbZInfo: {
|
|
if (header->type3.count == 8) {
|
|
ASSERT_MSG(payload[20] == 0xc0001000,
|
|
"NOP hint is missing in DB setup sequence");
|
|
last_db_extent.raw = payload[21];
|
|
} else {
|
|
last_db_extent.raw = 0;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetShReg: {
|
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
|
const auto set_size = (count - 1) * sizeof(u32);
|
|
|
|
if (set_data->reg_offset >= 0x200 &&
|
|
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
|
ASSERT(set_size <= sizeof(ComputeProgram));
|
|
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
|
|
(set_data->reg_offset - 0x200);
|
|
std::memcpy(addr, header + 2, set_size);
|
|
} else {
|
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
|
set_size);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetUconfigReg: {
|
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
|
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset],
|
|
header + 2, (count - 1) * sizeof(u32));
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetPredication: {
|
|
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION");
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IndexType: {
|
|
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
|
|
regs.index_buffer_type.raw = index_type->raw;
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndex2: {
|
|
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
|
|
regs.max_index_size = draw_index->max_size;
|
|
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
|
|
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
|
|
regs.num_indices = draw_index->index_count;
|
|
regs.draw_initiator = draw_index->draw_initiator;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndex2", cmd_address));
|
|
rasterizer->Draw(true);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndexOffset2: {
|
|
const auto* draw_index_off =
|
|
reinterpret_cast<const PM4CmdDrawIndexOffset2*>(header);
|
|
regs.max_index_size = draw_index_off->max_size;
|
|
regs.num_indices = draw_index_off->index_count;
|
|
regs.draw_initiator = draw_index_off->draw_initiator;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("gfx:{}:DrawIndexOffset2", cmd_address));
|
|
rasterizer->Draw(true, draw_index_off->index_offset);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndexAuto: {
|
|
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
|
|
regs.num_indices = draw_index->index_count;
|
|
regs.draw_initiator = draw_index->draw_initiator;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndexAuto", cmd_address));
|
|
rasterizer->Draw(false);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndirect: {
|
|
const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header);
|
|
const auto offset = draw_indirect->data_offset;
|
|
const auto stride = sizeof(DrawIndirectArgs);
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndirect", cmd_address));
|
|
rasterizer->DrawIndirect(false, indirect_args_addr, offset, stride, 1, 0);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndexIndirect: {
|
|
const auto* draw_index_indirect =
|
|
reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header);
|
|
const auto offset = draw_index_indirect->data_offset;
|
|
const auto stride = sizeof(DrawIndexedIndirectArgs);
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("gfx:{}:DrawIndexIndirect", cmd_address));
|
|
rasterizer->DrawIndirect(true, indirect_args_addr, offset, stride, 1, 0);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndexIndirectMulti: {
|
|
const auto* draw_index_indirect =
|
|
reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header);
|
|
const auto offset = draw_index_indirect->data_offset;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("gfx:{}:DrawIndexIndirectMulti", cmd_address));
|
|
rasterizer->DrawIndirect(true, indirect_args_addr, offset,
|
|
draw_index_indirect->stride,
|
|
draw_index_indirect->count, 0);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DrawIndexIndirectCountMulti: {
|
|
const auto* draw_index_indirect =
|
|
reinterpret_cast<const PM4CmdDrawIndexIndirectCountMulti*>(header);
|
|
const auto offset = draw_index_indirect->data_offset;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
|
}
|
|
if (rasterizer) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("gfx:{}:DrawIndexIndirectCountMulti", cmd_address));
|
|
rasterizer->DrawIndirect(true, indirect_args_addr, offset,
|
|
draw_index_indirect->stride,
|
|
draw_index_indirect->count,
|
|
draw_index_indirect->count_indirect_enable.Value()
|
|
? draw_index_indirect->count_addr
|
|
: 0);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DispatchDirect: {
|
|
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
|
auto& cs_program = GetCsRegs();
|
|
cs_program.dim_x = dispatch_direct->dim_x;
|
|
cs_program.dim_y = dispatch_direct->dim_y;
|
|
cs_program.dim_z = dispatch_direct->dim_z;
|
|
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
|
cs_program);
|
|
}
|
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DispatchDirect", cmd_address));
|
|
rasterizer->DispatchDirect();
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DispatchIndirect: {
|
|
const auto* dispatch_indirect =
|
|
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
|
auto& cs_program = GetCsRegs();
|
|
const auto offset = dispatch_indirect->data_offset;
|
|
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
|
cs_program);
|
|
}
|
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("gfx:{}:DispatchIndirect", cmd_address));
|
|
rasterizer->DispatchIndirect(indirect_args_addr, offset, size);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::NumInstances: {
|
|
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
|
|
regs.num_instances.num_instances = num_instances->num_instances;
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IndexBase: {
|
|
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
|
|
regs.index_base_address.base_addr_lo = index_base->addr_lo;
|
|
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IndexBufferSize: {
|
|
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
|
regs.num_indices = index_size->num_indices;
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetBase: {
|
|
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
|
|
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
|
|
indirect_args_addr = set_base->Address<u64>();
|
|
break;
|
|
}
|
|
case PM4ItOpcode::EventWrite: {
|
|
const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
|
LOG_DEBUG(Render_Vulkan,
|
|
"Encountered EventWrite: event_type = {}, event_index = {}",
|
|
magic_enum::enum_name(event->event_type.Value()),
|
|
magic_enum::enum_name(event->event_index.Value()));
|
|
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
|
|
// TODO: handle proper synchronization, for now signal that update is done
|
|
// immediately
|
|
regs.cp_strmout_cntl.offset_update_done = 1;
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::EventWriteEos: {
|
|
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
|
|
event_eos->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
|
auto* memory = Core::Memory::Instance();
|
|
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
|
|
memcpy(address, &data, num_bytes);
|
|
}
|
|
});
|
|
if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) {
|
|
ASSERT(event_eos->size == 1);
|
|
if (rasterizer) {
|
|
rasterizer->Finish();
|
|
const u32 value = rasterizer->ReadDataFromGds(event_eos->gds_index);
|
|
*event_eos->Address() = value;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::EventWriteEop: {
|
|
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
|
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
|
auto* memory = Core::Memory::Instance();
|
|
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
|
|
memcpy(address, &data, num_bytes);
|
|
}
|
|
});
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DmaData: {
|
|
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
|
|
if (dma_data->dst_addr_lo == 0x3022C || !rasterizer) {
|
|
break;
|
|
}
|
|
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
|
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32),
|
|
true);
|
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
|
dma_data->dst_sel == DmaDataDst::Gds) {
|
|
rasterizer->InlineData(dma_data->dst_addr_lo,
|
|
dma_data->SrcAddress<const void*>(),
|
|
dma_data->NumBytes(), true);
|
|
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
|
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
|
|
sizeof(u32), false);
|
|
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
|
// LOG_WARNING(Render_Vulkan, "GDS memory read");
|
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
|
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
|
dma_data->SrcAddress<const void*>(),
|
|
dma_data->NumBytes(), false);
|
|
} else {
|
|
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
|
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WriteData: {
|
|
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
|
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
|
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
|
u64* address = write_data->Address<u64*>();
|
|
if (!write_data->wr_one_addr.Value()) {
|
|
std::memcpy(address, write_data->data, data_size);
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::MemSemaphore: {
|
|
const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header);
|
|
if (mem_semaphore->IsSignaling()) {
|
|
mem_semaphore->Signal();
|
|
} else {
|
|
while (!mem_semaphore->Signaled()) {
|
|
YIELD_GFX();
|
|
}
|
|
mem_semaphore->Decrement();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::AcquireMem: {
|
|
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
|
|
break;
|
|
}
|
|
case PM4ItOpcode::Rewind: {
|
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
|
while (!rewind->Valid()) {
|
|
YIELD_GFX();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WaitRegMem: {
|
|
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
|
// ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
|
// Optimization: VO label waits are special because the emulator
|
|
// will write to the label when presentation is finished. So if
|
|
// there are no other submits to yield to we can sleep the thread
|
|
// instead and allow other tasks to run.
|
|
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
|
if (vo_port->IsVoLabel(wait_addr) &&
|
|
num_submits == mapped_queues[GfxQueueId].submits.size()) {
|
|
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
|
|
break;
|
|
}
|
|
while (!wait_reg_mem->Test(regs.reg_array)) {
|
|
YIELD_GFX();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IndirectBuffer: {
|
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
|
auto task = ProcessGraphics(
|
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
|
|
RESUME_GFX(task);
|
|
|
|
while (!task.handle.done()) {
|
|
YIELD_GFX();
|
|
RESUME_GFX(task);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IncrementDeCounter: {
|
|
++cblock.de_count;
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WaitOnCeCounter: {
|
|
while (cblock.ce_count <= cblock.de_count && !ce_task.handle.done()) {
|
|
RESUME_GFX(ce_task);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::PfpSyncMe: {
|
|
if (rasterizer) {
|
|
rasterizer->CpSync();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::StrmoutBufferUpdate: {
|
|
const auto* strmout = reinterpret_cast<const PM4CmdStrmoutBufferUpdate*>(header);
|
|
LOG_WARNING(Render_Vulkan,
|
|
"Unimplemented IT_STRMOUT_BUFFER_UPDATE, update_memory = {}, "
|
|
"source_select = {}, buffer_select = {}",
|
|
strmout->update_memory.Value(),
|
|
magic_enum::enum_name(strmout->source_select.Value()),
|
|
strmout->buffer_select.Value());
|
|
break;
|
|
}
|
|
default:
|
|
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
|
static_cast<u32>(opcode), count);
|
|
}
|
|
dcb = NextPacket(dcb, header->type3.NumWords() + 1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (ce_task.handle) {
|
|
while (!ce_task.handle.done()) {
|
|
RESUME_GFX(ce_task);
|
|
}
|
|
ce_task.handle.destroy();
|
|
}
|
|
|
|
FIBER_EXIT;
|
|
}
|
|
|
|
template <bool is_indirect>
|
|
Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vqid) {
|
|
FIBER_ENTER(acb_task_name[vqid]);
|
|
auto& queue = asc_queues[{vqid}];
|
|
|
|
auto base_addr = reinterpret_cast<VAddr>(acb);
|
|
while (acb_dwords > 0) {
|
|
auto* header = reinterpret_cast<const PM4Header*>(acb);
|
|
u32 next_dw_off = header->type3.NumWords() + 1;
|
|
|
|
// If we have a buffered packet, use it.
|
|
if (queue.tmp_dwords > 0) [[unlikely]] {
|
|
header = reinterpret_cast<const PM4Header*>(queue.tmp_packet.data());
|
|
next_dw_off = header->type3.NumWords() + 1 - queue.tmp_dwords;
|
|
std::memcpy(queue.tmp_packet.data() + queue.tmp_dwords, acb, next_dw_off * sizeof(u32));
|
|
queue.tmp_dwords = 0;
|
|
}
|
|
|
|
// If the packet is split across ring boundary, buffer until next submission
|
|
if (next_dw_off > acb_dwords) [[unlikely]] {
|
|
std::memcpy(queue.tmp_packet.data(), acb, acb_dwords * sizeof(u32));
|
|
queue.tmp_dwords = acb_dwords;
|
|
if constexpr (!is_indirect) {
|
|
*queue.read_addr += acb_dwords;
|
|
*queue.read_addr %= queue.ring_size_dw;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (header->type != 3) {
|
|
// No other types of packets were spotted so far
|
|
UNREACHABLE_MSG("Invalid PM4 type {}", header->type.Value());
|
|
}
|
|
|
|
const PM4ItOpcode opcode = header->type3.opcode;
|
|
const auto* it_body = reinterpret_cast<const u32*>(header) + 1;
|
|
switch (opcode) {
|
|
case PM4ItOpcode::Nop: {
|
|
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
|
break;
|
|
}
|
|
case PM4ItOpcode::IndirectBuffer: {
|
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
|
auto task = ProcessCompute<true>(indirect_buffer->Address<const u32>(),
|
|
indirect_buffer->ib_size, vqid);
|
|
RESUME_ASC(task, vqid);
|
|
|
|
while (!task.handle.done()) {
|
|
YIELD_ASC(vqid);
|
|
RESUME_ASC(task, vqid);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DmaData: {
|
|
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
|
|
if (dma_data->dst_addr_lo == 0x3022C || !rasterizer) {
|
|
break;
|
|
}
|
|
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
|
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
|
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
|
dma_data->dst_sel == DmaDataDst::Gds) {
|
|
rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
|
|
dma_data->NumBytes(), true);
|
|
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
|
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
|
|
false);
|
|
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
|
// LOG_WARNING(Render_Vulkan, "GDS memory read");
|
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
|
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
|
dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
|
|
false);
|
|
} else {
|
|
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
|
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::AcquireMem: {
|
|
break;
|
|
}
|
|
case PM4ItOpcode::Rewind: {
|
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
|
while (!rewind->Valid()) {
|
|
YIELD_ASC(vqid);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetShReg: {
|
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
|
const auto set_size = (header->type3.NumWords() - 1) * sizeof(u32);
|
|
|
|
if (set_data->reg_offset >= 0x200 &&
|
|
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
|
ASSERT(set_size <= sizeof(ComputeProgram));
|
|
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
|
|
(set_data->reg_offset - 0x200);
|
|
std::memcpy(addr, header + 2, set_size);
|
|
} else {
|
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
|
set_size);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::SetQueueReg: {
|
|
const auto* set_data = reinterpret_cast<const PM4CmdSetQueueReg*>(header);
|
|
LOG_WARNING(Render, "Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}",
|
|
set_data->vqid.Value(), set_data->reg_offset.Value());
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DispatchDirect: {
|
|
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
|
auto& cs_program = GetCsRegs();
|
|
cs_program.dim_x = dispatch_direct->dim_x;
|
|
cs_program.dim_y = dispatch_direct->dim_y;
|
|
cs_program.dim_z = dispatch_direct->dim_z;
|
|
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
|
cs_program);
|
|
}
|
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("asc[{}]:{}:DispatchDirect", vqid, cmd_address));
|
|
rasterizer->DispatchDirect();
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::DispatchIndirect: {
|
|
const auto* dispatch_indirect =
|
|
reinterpret_cast<const PM4CmdDispatchIndirectMec*>(header);
|
|
auto& cs_program = GetCsRegs();
|
|
const auto ib_address = dispatch_indirect->Address<VAddr>();
|
|
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
|
if (DebugState.DumpingCurrentReg()) {
|
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
|
cs_program);
|
|
}
|
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
|
rasterizer->ScopeMarkerBegin(
|
|
fmt::format("asc[{}]:{}:DispatchIndirect", vqid, cmd_address));
|
|
rasterizer->DispatchIndirect(ib_address, 0, size);
|
|
rasterizer->ScopeMarkerEnd();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WriteData: {
|
|
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
|
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
|
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
|
if (!write_data->wr_one_addr.Value()) {
|
|
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::MemSemaphore: {
|
|
const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header);
|
|
if (mem_semaphore->IsSignaling()) {
|
|
mem_semaphore->Signal();
|
|
} else {
|
|
while (!mem_semaphore->Signaled()) {
|
|
YIELD_ASC(vqid);
|
|
}
|
|
mem_semaphore->Decrement();
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::WaitRegMem: {
|
|
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
|
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
|
while (!wait_reg_mem->Test(regs.reg_array)) {
|
|
YIELD_ASC(vqid);
|
|
}
|
|
break;
|
|
}
|
|
case PM4ItOpcode::ReleaseMem: {
|
|
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
|
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
|
break;
|
|
}
|
|
case PM4ItOpcode::EventWrite: {
|
|
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
|
break;
|
|
}
|
|
default:
|
|
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
|
static_cast<u32>(opcode), header->type3.NumWords());
|
|
}
|
|
|
|
acb += next_dw_off;
|
|
acb_dwords -= next_dw_off;
|
|
|
|
if constexpr (!is_indirect) {
|
|
*queue.read_addr += next_dw_off;
|
|
*queue.read_addr %= queue.ring_size_dw;
|
|
}
|
|
}
|
|
|
|
FIBER_EXIT;
|
|
}
|
|
|
|
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
|
|
std::span<const u32> dcb, std::span<const u32> ccb) {
|
|
auto& queue = mapped_queues[GfxQueueId];
|
|
|
|
// std::vector resize can invalidate spans for commands in flight
|
|
ASSERT_MSG(queue.dcb_buffer.capacity() >= queue.dcb_buffer_offset + dcb.size(),
|
|
"dcb copy buffer out of reserved space");
|
|
ASSERT_MSG(queue.ccb_buffer.capacity() >= queue.ccb_buffer_offset + ccb.size(),
|
|
"ccb copy buffer out of reserved space");
|
|
|
|
queue.dcb_buffer.resize(
|
|
std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size()));
|
|
queue.ccb_buffer.resize(
|
|
std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size()));
|
|
|
|
u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset;
|
|
u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset;
|
|
if (!dcb.empty()) {
|
|
std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(),
|
|
dcb.size_bytes());
|
|
queue.dcb_buffer_offset += dcb.size();
|
|
dcb = std::span<const u32>{queue.dcb_buffer.begin() + prev_dcb_buffer_offset,
|
|
queue.dcb_buffer.begin() + queue.dcb_buffer_offset};
|
|
}
|
|
|
|
if (!ccb.empty()) {
|
|
std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(),
|
|
ccb.size_bytes());
|
|
queue.ccb_buffer_offset += ccb.size();
|
|
ccb = std::span<const u32>{queue.ccb_buffer.begin() + prev_ccb_buffer_offset,
|
|
queue.ccb_buffer.begin() + queue.ccb_buffer_offset};
|
|
}
|
|
|
|
return std::make_pair(dcb, ccb);
|
|
}
|
|
|
|
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
|
auto& queue = mapped_queues[GfxQueueId];
|
|
|
|
if (Config::copyGPUCmdBuffers()) {
|
|
std::tie(dcb, ccb) = CopyCmdBuffers(dcb, ccb);
|
|
}
|
|
|
|
auto task = ProcessGraphics(dcb, ccb);
|
|
{
|
|
std::scoped_lock lock{queue.m_access};
|
|
queue.submits.emplace(task.handle);
|
|
}
|
|
|
|
std::scoped_lock lk{submit_mutex};
|
|
++num_submits;
|
|
submit_cv.notify_one();
|
|
}
|
|
|
|
void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
|
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
|
auto& queue = mapped_queues[gnm_vqid];
|
|
|
|
const auto vqid = gnm_vqid - 1;
|
|
const auto& task = ProcessCompute(acb.data(), acb.size(), vqid);
|
|
{
|
|
std::scoped_lock lock{queue.m_access};
|
|
queue.submits.emplace(task.handle);
|
|
}
|
|
|
|
std::scoped_lock lk{submit_mutex};
|
|
num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1);
|
|
++num_submits;
|
|
submit_cv.notify_one();
|
|
}
|
|
|
|
} // namespace AmdGpu
|