mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Readbacks proof of concept rebased (#3178)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
* Readbacks proof of concept * liverpool: Use span for acb too * config: Add readbacks config option * config: Log readbacks
This commit is contained in:
parent
5789fd881c
commit
0594dac405
17 changed files with 375 additions and 186 deletions
|
@ -51,6 +51,7 @@ static bool isShowSplash = false;
|
||||||
static std::string isSideTrophy = "right";
|
static std::string isSideTrophy = "right";
|
||||||
static bool isNullGpu = false;
|
static bool isNullGpu = false;
|
||||||
static bool shouldCopyGPUBuffers = false;
|
static bool shouldCopyGPUBuffers = false;
|
||||||
|
static bool readbacksEnabled = false;
|
||||||
static bool shouldDumpShaders = false;
|
static bool shouldDumpShaders = false;
|
||||||
static bool shouldPatchShaders = true;
|
static bool shouldPatchShaders = true;
|
||||||
static u32 vblankDivider = 1;
|
static u32 vblankDivider = 1;
|
||||||
|
@ -240,6 +241,10 @@ bool copyGPUCmdBuffers() {
|
||||||
return shouldCopyGPUBuffers;
|
return shouldCopyGPUBuffers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool readbacks() {
|
||||||
|
return readbacksEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
bool dumpShaders() {
|
bool dumpShaders() {
|
||||||
return shouldDumpShaders;
|
return shouldDumpShaders;
|
||||||
}
|
}
|
||||||
|
@ -344,6 +349,10 @@ void setCopyGPUCmdBuffers(bool enable) {
|
||||||
shouldCopyGPUBuffers = enable;
|
shouldCopyGPUBuffers = enable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setReadbacks(bool enable) {
|
||||||
|
readbacksEnabled = enable;
|
||||||
|
}
|
||||||
|
|
||||||
void setDumpShaders(bool enable) {
|
void setDumpShaders(bool enable) {
|
||||||
shouldDumpShaders = enable;
|
shouldDumpShaders = enable;
|
||||||
}
|
}
|
||||||
|
@ -586,6 +595,7 @@ void load(const std::filesystem::path& path) {
|
||||||
screenHeight = toml::find_or<int>(gpu, "screenHeight", screenHeight);
|
screenHeight = toml::find_or<int>(gpu, "screenHeight", screenHeight);
|
||||||
isNullGpu = toml::find_or<bool>(gpu, "nullGpu", false);
|
isNullGpu = toml::find_or<bool>(gpu, "nullGpu", false);
|
||||||
shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", false);
|
shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", false);
|
||||||
|
readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", false);
|
||||||
shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", false);
|
shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", false);
|
||||||
shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", true);
|
shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", true);
|
||||||
vblankDivider = toml::find_or<int>(gpu, "vblankDivider", 1);
|
vblankDivider = toml::find_or<int>(gpu, "vblankDivider", 1);
|
||||||
|
@ -735,6 +745,7 @@ void save(const std::filesystem::path& path) {
|
||||||
data["GPU"]["screenHeight"] = screenHeight;
|
data["GPU"]["screenHeight"] = screenHeight;
|
||||||
data["GPU"]["nullGpu"] = isNullGpu;
|
data["GPU"]["nullGpu"] = isNullGpu;
|
||||||
data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
|
data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
|
||||||
|
data["GPU"]["readbacks"] = readbacksEnabled;
|
||||||
data["GPU"]["dumpShaders"] = shouldDumpShaders;
|
data["GPU"]["dumpShaders"] = shouldDumpShaders;
|
||||||
data["GPU"]["patchShaders"] = shouldPatchShaders;
|
data["GPU"]["patchShaders"] = shouldPatchShaders;
|
||||||
data["GPU"]["vblankDivider"] = vblankDivider;
|
data["GPU"]["vblankDivider"] = vblankDivider;
|
||||||
|
|
|
@ -45,6 +45,8 @@ bool nullGpu();
|
||||||
void setNullGpu(bool enable);
|
void setNullGpu(bool enable);
|
||||||
bool copyGPUCmdBuffers();
|
bool copyGPUCmdBuffers();
|
||||||
void setCopyGPUCmdBuffers(bool enable);
|
void setCopyGPUCmdBuffers(bool enable);
|
||||||
|
bool readbacks();
|
||||||
|
void setReadbacks(bool enable);
|
||||||
bool dumpShaders();
|
bool dumpShaders();
|
||||||
void setDumpShaders(bool enable);
|
void setDumpShaders(bool enable);
|
||||||
u32 vblankDiv();
|
u32 vblankDiv();
|
||||||
|
|
|
@ -302,14 +302,15 @@ struct AddressSpace::Impl {
|
||||||
new_flags = PAGE_READWRITE;
|
new_flags = PAGE_READWRITE;
|
||||||
} else if (read && !write) {
|
} else if (read && !write) {
|
||||||
new_flags = PAGE_READONLY;
|
new_flags = PAGE_READONLY;
|
||||||
} else if (execute && !read && not write) {
|
} else if (execute && !read && !write) {
|
||||||
new_flags = PAGE_EXECUTE;
|
new_flags = PAGE_EXECUTE;
|
||||||
} else if (!read && !write && !execute) {
|
} else if (!read && !write && !execute) {
|
||||||
new_flags = PAGE_NOACCESS;
|
new_flags = PAGE_NOACCESS;
|
||||||
} else {
|
} else {
|
||||||
LOG_CRITICAL(Common_Memory,
|
LOG_CRITICAL(Common_Memory,
|
||||||
"Unsupported protection flag combination for address {:#x}, size {}",
|
"Unsupported protection flag combination for address {:#x}, size {}, "
|
||||||
virtual_addr, size);
|
"read={}, write={}, execute={}",
|
||||||
|
virtual_addr, size, read, write, execute);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
enum class MemoryPermission : u32 {
|
enum class MemoryPermission : u32 {
|
||||||
|
None = 0,
|
||||||
Read = 1 << 0,
|
Read = 1 << 0,
|
||||||
Write = 1 << 1,
|
Write = 1 << 1,
|
||||||
ReadWrite = Read | Write,
|
ReadWrite = Read | Write,
|
||||||
|
|
|
@ -2834,7 +2834,7 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config::copyGPUCmdBuffers()) {
|
if (Config::copyGPUCmdBuffers()) {
|
||||||
liverpool->reserveCopyBufferSpace();
|
liverpool->ReserveCopyBufferSpace();
|
||||||
}
|
}
|
||||||
|
|
||||||
Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock,
|
Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock,
|
||||||
|
|
|
@ -132,6 +132,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
|
||||||
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
|
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
|
||||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
||||||
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
||||||
|
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
|
||||||
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
||||||
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
||||||
LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());
|
LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());
|
||||||
|
|
|
@ -72,8 +72,23 @@ Liverpool::~Liverpool() {
|
||||||
process_thread.join();
|
process_thread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Liverpool::ProcessCommands() {
|
||||||
|
// Process incoming commands with high priority
|
||||||
|
while (num_commands) {
|
||||||
|
Common::UniqueFunction<void> callback{};
|
||||||
|
{
|
||||||
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
callback = std::move(command_queue.front());
|
||||||
|
command_queue.pop();
|
||||||
|
--num_commands;
|
||||||
|
}
|
||||||
|
callback();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Liverpool::Process(std::stop_token stoken) {
|
void Liverpool::Process(std::stop_token stoken) {
|
||||||
Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor");
|
Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor");
|
||||||
|
gpu_id = std::this_thread::get_id();
|
||||||
|
|
||||||
while (!stoken.stop_requested()) {
|
while (!stoken.stop_requested()) {
|
||||||
{
|
{
|
||||||
|
@ -90,18 +105,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
curr_qid = -1;
|
curr_qid = -1;
|
||||||
|
|
||||||
while (num_submits || num_commands) {
|
while (num_submits || num_commands) {
|
||||||
|
ProcessCommands();
|
||||||
// Process incoming commands with high priority
|
|
||||||
while (num_commands) {
|
|
||||||
Common::UniqueFunction<void> callback{};
|
|
||||||
{
|
|
||||||
std::unique_lock lk{submit_mutex};
|
|
||||||
callback = std::move(command_queue.front());
|
|
||||||
command_queue.pop();
|
|
||||||
--num_commands;
|
|
||||||
}
|
|
||||||
callback();
|
|
||||||
}
|
|
||||||
|
|
||||||
curr_qid = (curr_qid + 1) % num_mapped_queues;
|
curr_qid = (curr_qid + 1) % num_mapped_queues;
|
||||||
|
|
||||||
|
@ -147,6 +151,8 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
FIBER_ENTER(ccb_task_name);
|
FIBER_ENTER(ccb_task_name);
|
||||||
|
|
||||||
while (!ccb.empty()) {
|
while (!ccb.empty()) {
|
||||||
|
ProcessCommands();
|
||||||
|
|
||||||
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
||||||
const u32 type = header->type;
|
const u32 type = header->type;
|
||||||
if (type != 3) {
|
if (type != 3) {
|
||||||
|
@ -224,6 +230,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
|
|
||||||
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
||||||
while (!dcb.empty()) {
|
while (!dcb.empty()) {
|
||||||
|
ProcessCommands();
|
||||||
|
|
||||||
const auto* header = reinterpret_cast<const PM4Header*>(dcb.data());
|
const auto* header = reinterpret_cast<const PM4Header*>(dcb.data());
|
||||||
const u32 type = header->type;
|
const u32 type = header->type;
|
||||||
|
|
||||||
|
@ -638,9 +646,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
||||||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
||||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||||
rasterizer->InlineData(dma_data->dst_addr_lo,
|
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
|
||||||
dma_data->SrcAddress<const void*>(),
|
dma_data->NumBytes(), true, false);
|
||||||
dma_data->NumBytes(), true);
|
|
||||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
|
@ -649,14 +656,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
// LOG_WARNING(Render_Vulkan, "GDS memory read");
|
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
|
||||||
|
dma_data->NumBytes(), false, true);
|
||||||
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
||||||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(),
|
||||||
dma_data->SrcAddress<const void*>(),
|
dma_data->SrcAddress<VAddr>(), dma_data->NumBytes(),
|
||||||
dma_data->NumBytes(), false);
|
false, false);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
||||||
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
||||||
|
@ -702,6 +710,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::Rewind: {
|
case PM4ItOpcode::Rewind: {
|
||||||
|
if (!rasterizer) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||||
while (!rewind->Valid()) {
|
while (!rewind->Valid()) {
|
||||||
YIELD_GFX();
|
YIELD_GFX();
|
||||||
|
@ -801,29 +812,32 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool is_indirect>
|
template <bool is_indirect>
|
||||||
Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vqid) {
|
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||||
FIBER_ENTER(acb_task_name[vqid]);
|
FIBER_ENTER(acb_task_name[vqid]);
|
||||||
auto& queue = asc_queues[{vqid}];
|
auto& queue = asc_queues[{vqid}];
|
||||||
|
|
||||||
auto base_addr = reinterpret_cast<VAddr>(acb);
|
auto base_addr = reinterpret_cast<VAddr>(acb.data());
|
||||||
while (acb_dwords > 0) {
|
while (!acb.empty()) {
|
||||||
auto* header = reinterpret_cast<const PM4Header*>(acb);
|
ProcessCommands();
|
||||||
|
|
||||||
|
auto* header = reinterpret_cast<const PM4Header*>(acb.data());
|
||||||
u32 next_dw_off = header->type3.NumWords() + 1;
|
u32 next_dw_off = header->type3.NumWords() + 1;
|
||||||
|
|
||||||
// If we have a buffered packet, use it.
|
// If we have a buffered packet, use it.
|
||||||
if (queue.tmp_dwords > 0) [[unlikely]] {
|
if (queue.tmp_dwords > 0) [[unlikely]] {
|
||||||
header = reinterpret_cast<const PM4Header*>(queue.tmp_packet.data());
|
header = reinterpret_cast<const PM4Header*>(queue.tmp_packet.data());
|
||||||
next_dw_off = header->type3.NumWords() + 1 - queue.tmp_dwords;
|
next_dw_off = header->type3.NumWords() + 1 - queue.tmp_dwords;
|
||||||
std::memcpy(queue.tmp_packet.data() + queue.tmp_dwords, acb, next_dw_off * sizeof(u32));
|
std::memcpy(queue.tmp_packet.data() + queue.tmp_dwords, acb.data(),
|
||||||
|
next_dw_off * sizeof(u32));
|
||||||
queue.tmp_dwords = 0;
|
queue.tmp_dwords = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the packet is split across ring boundary, buffer until next submission
|
// If the packet is split across ring boundary, buffer until next submission
|
||||||
if (next_dw_off > acb_dwords) [[unlikely]] {
|
if (next_dw_off > acb.size()) [[unlikely]] {
|
||||||
std::memcpy(queue.tmp_packet.data(), acb, acb_dwords * sizeof(u32));
|
std::memcpy(queue.tmp_packet.data(), acb.data(), acb.size_bytes());
|
||||||
queue.tmp_dwords = acb_dwords;
|
queue.tmp_dwords = acb.size();
|
||||||
if constexpr (!is_indirect) {
|
if constexpr (!is_indirect) {
|
||||||
*queue.read_addr += acb_dwords;
|
*queue.read_addr += acb.size();
|
||||||
*queue.read_addr %= queue.ring_size_dw;
|
*queue.read_addr %= queue.ring_size_dw;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -832,9 +846,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||||
if (header->type == 2) {
|
if (header->type == 2) {
|
||||||
// Type-2 packet are used for padding purposes
|
// Type-2 packet are used for padding purposes
|
||||||
next_dw_off = 1;
|
next_dw_off = 1;
|
||||||
acb += next_dw_off;
|
acb = NextPacket(acb, next_dw_off);
|
||||||
acb_dwords -= next_dw_off;
|
|
||||||
|
|
||||||
if constexpr (!is_indirect) {
|
if constexpr (!is_indirect) {
|
||||||
*queue.read_addr += next_dw_off;
|
*queue.read_addr += next_dw_off;
|
||||||
*queue.read_addr %= queue.ring_size_dw;
|
*queue.read_addr %= queue.ring_size_dw;
|
||||||
|
@ -856,8 +868,8 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IndirectBuffer: {
|
case PM4ItOpcode::IndirectBuffer: {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task = ProcessCompute<true>(indirect_buffer->Address<const u32>(),
|
auto task = ProcessCompute<true>(
|
||||||
indirect_buffer->ib_size, vqid);
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
||||||
RESUME_ASC(task, vqid);
|
RESUME_ASC(task, vqid);
|
||||||
|
|
||||||
while (!task.handle.done()) {
|
while (!task.handle.done()) {
|
||||||
|
@ -876,8 +888,8 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||||
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
||||||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
||||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||||
rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
|
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
|
||||||
dma_data->NumBytes(), true);
|
dma_data->NumBytes(), true, false);
|
||||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
|
@ -886,14 +898,14 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
// LOG_WARNING(Render_Vulkan, "GDS memory read");
|
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
|
||||||
|
dma_data->NumBytes(), false, true);
|
||||||
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
||||||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->SrcAddress<VAddr>(),
|
||||||
dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
|
dma_data->NumBytes(), false, false);
|
||||||
false);
|
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
||||||
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
||||||
|
@ -904,6 +916,9 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::Rewind: {
|
case PM4ItOpcode::Rewind: {
|
||||||
|
if (!rasterizer) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||||
while (!rewind->Valid()) {
|
while (!rewind->Valid()) {
|
||||||
YIELD_ASC(vqid);
|
YIELD_ASC(vqid);
|
||||||
|
@ -1016,8 +1031,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||||
static_cast<u32>(opcode), header->type3.NumWords());
|
static_cast<u32>(opcode), header->type3.NumWords());
|
||||||
}
|
}
|
||||||
|
|
||||||
acb += next_dw_off;
|
acb = NextPacket(acb, next_dw_off);
|
||||||
acb_dwords -= next_dw_off;
|
|
||||||
|
|
||||||
if constexpr (!is_indirect) {
|
if constexpr (!is_indirect) {
|
||||||
*queue.read_addr += next_dw_off;
|
*queue.read_addr += next_dw_off;
|
||||||
|
@ -1087,7 +1101,7 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
||||||
auto& queue = mapped_queues[gnm_vqid];
|
auto& queue = mapped_queues[gnm_vqid];
|
||||||
|
|
||||||
const auto vqid = gnm_vqid - 1;
|
const auto vqid = gnm_vqid - 1;
|
||||||
const auto& task = ProcessCompute(acb.data(), acb.size(), vqid);
|
const auto& task = ProcessCompute(acb, vqid);
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{queue.m_access};
|
std::scoped_lock lock{queue.m_access};
|
||||||
queue.submits.emplace(task.handle);
|
queue.submits.emplace(task.handle);
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <coroutine>
|
#include <coroutine>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <semaphore>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -1512,14 +1513,32 @@ public:
|
||||||
rasterizer = rasterizer_;
|
rasterizer = rasterizer_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SendCommand(Common::UniqueFunction<void>&& func) {
|
template <bool wait_done = false>
|
||||||
std::scoped_lock lk{submit_mutex};
|
void SendCommand(auto&& func) {
|
||||||
command_queue.emplace(std::move(func));
|
if (std::this_thread::get_id() == gpu_id) {
|
||||||
++num_commands;
|
return func();
|
||||||
submit_cv.notify_one();
|
}
|
||||||
|
if constexpr (wait_done) {
|
||||||
|
std::binary_semaphore sem{0};
|
||||||
|
{
|
||||||
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
command_queue.emplace([&sem, &func] {
|
||||||
|
func();
|
||||||
|
sem.release();
|
||||||
|
});
|
||||||
|
++num_commands;
|
||||||
|
submit_cv.notify_one();
|
||||||
|
}
|
||||||
|
sem.acquire();
|
||||||
|
} else {
|
||||||
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
command_queue.emplace(std::move(func));
|
||||||
|
++num_commands;
|
||||||
|
submit_cv.notify_one();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reserveCopyBufferSpace() {
|
void ReserveCopyBufferSpace() {
|
||||||
GpuQueue& gfx_queue = mapped_queues[GfxQueueId];
|
GpuQueue& gfx_queue = mapped_queues[GfxQueueId];
|
||||||
std::scoped_lock<std::mutex> lk(gfx_queue.m_access);
|
std::scoped_lock<std::mutex> lk(gfx_queue.m_access);
|
||||||
|
|
||||||
|
@ -1581,8 +1600,9 @@ private:
|
||||||
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
Task ProcessCeUpdate(std::span<const u32> ccb);
|
Task ProcessCeUpdate(std::span<const u32> ccb);
|
||||||
template <bool is_indirect = false>
|
template <bool is_indirect = false>
|
||||||
Task ProcessCompute(const u32* acb, u32 acb_dwords, u32 vqid);
|
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
|
||||||
|
|
||||||
|
void ProcessCommands();
|
||||||
void Process(std::stop_token stoken);
|
void Process(std::stop_token stoken);
|
||||||
|
|
||||||
struct GpuQueue {
|
struct GpuQueue {
|
||||||
|
@ -1626,6 +1646,7 @@ private:
|
||||||
std::mutex submit_mutex;
|
std::mutex submit_mutex;
|
||||||
std::condition_variable_any submit_cv;
|
std::condition_variable_any submit_cv;
|
||||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||||
|
std::thread::id gpu_id;
|
||||||
int curr_qid{-1};
|
int curr_qid{-1};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -3,12 +3,14 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
|
#include "common/config.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
|
#include "video_core/buffer_cache/memory_tracker.h"
|
||||||
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
|
@ -27,10 +29,10 @@ static constexpr size_t DeviceBufferSize = 128_MB;
|
||||||
static constexpr size_t MaxPageFaults = 1024;
|
static constexpr size_t MaxPageFaults = 1024;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||||
TextureCache& texture_cache_, PageManager& tracker_)
|
PageManager& tracker)
|
||||||
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
|
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
|
||||||
|
@ -38,13 +40,14 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
||||||
0, AllFlags, BDA_PAGETABLE_SIZE},
|
0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||||
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE),
|
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE) {
|
||||||
memory_tracker{tracker} {
|
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
|
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
|
||||||
"BDA Page Table Buffer");
|
"BDA Page Table Buffer");
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), "Fault Buffer");
|
Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), "Fault Buffer");
|
||||||
|
|
||||||
|
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
||||||
|
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
const auto null_id =
|
const auto null_id =
|
||||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
||||||
|
@ -129,22 +132,27 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
|
|
||||||
BufferCache::~BufferCache() = default;
|
BufferCache::~BufferCache() = default;
|
||||||
|
|
||||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool unmap) {
|
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||||
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
if (!IsRegionRegistered(device_addr, size)) {
|
||||||
if (is_tracked) {
|
return;
|
||||||
// Mark the page as CPU modified to stop tracking writes.
|
|
||||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
|
||||||
|
|
||||||
if (unmap) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (Config::readbacks() && memory_tracker->IsRegionGpuModified(device_addr, size)) {
|
||||||
|
ReadMemory(device_addr, size);
|
||||||
|
}
|
||||||
|
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
|
||||||
|
liverpool->SendCommand<true>([this, device_addr, size] {
|
||||||
|
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
|
||||||
|
DownloadBufferMemory(buffer, device_addr, size);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
||||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
memory_tracker.ForEachDownloadRange<true>(
|
memory_tracker->ForEachDownloadRange<false>(
|
||||||
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||||
const VAddr buffer_addr = buffer.CpuAddr();
|
const VAddr buffer_addr = buffer.CpuAddr();
|
||||||
const auto add_download = [&](VAddr start, VAddr end) {
|
const auto add_download = [&](VAddr start, VAddr end) {
|
||||||
|
@ -155,7 +163,10 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||||
.dstOffset = total_size_bytes,
|
.dstOffset = total_size_bytes,
|
||||||
.size = new_size,
|
.size = new_size,
|
||||||
});
|
});
|
||||||
total_size_bytes += new_size;
|
// Align up to avoid cache conflicts
|
||||||
|
constexpr u64 align = 64ULL;
|
||||||
|
constexpr u64 mask = ~(align - 1ULL);
|
||||||
|
total_size_bytes += (new_size + align - 1) & mask;
|
||||||
};
|
};
|
||||||
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
|
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
|
||||||
gpu_modified_ranges.Subtract(device_addr_out, range_size);
|
gpu_modified_ranges.Subtract(device_addr_out, range_size);
|
||||||
|
@ -173,11 +184,14 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies);
|
cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies);
|
||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
|
auto* memory = Core::Memory::Instance();
|
||||||
for (const auto& copy : copies) {
|
for (const auto& copy : copies) {
|
||||||
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||||
const u64 dst_offset = copy.dstOffset - offset;
|
const u64 dst_offset = copy.dstOffset - offset;
|
||||||
std::memcpy(std::bit_cast<u8*>(copy_device_addr), download + dst_offset, copy.size);
|
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), download + dst_offset,
|
||||||
|
copy.size);
|
||||||
}
|
}
|
||||||
|
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||||
|
@ -296,9 +310,11 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
||||||
|
|
||||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||||
if (!is_gds && !IsRegionGpuModified(address, num_bytes)) {
|
if (!is_gds) {
|
||||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
|
||||||
return;
|
if (!IsRegionRegistered(address, num_bytes)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Buffer* buffer = [&] {
|
Buffer* buffer = [&] {
|
||||||
if (is_gds) {
|
if (is_gds) {
|
||||||
|
@ -326,25 +342,108 @@ void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, boo
|
||||||
WriteDataBuffer(*buffer, address, value, num_bytes);
|
WriteDataBuffer(*buffer, address, value, num_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||||
|
if (!dst_gds && !IsRegionGpuModified(dst, num_bytes)) {
|
||||||
|
if (!src_gds && !IsRegionGpuModified(src, num_bytes)) {
|
||||||
|
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
||||||
|
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Without a readback there's nothing we can do with this
|
||||||
|
// Fallback to creating dst buffer on GPU to at least have this data there
|
||||||
|
}
|
||||||
|
auto& src_buffer = [&] -> const Buffer& {
|
||||||
|
if (src_gds) {
|
||||||
|
return gds_buffer;
|
||||||
|
}
|
||||||
|
// Avoid using ObtainBuffer here as that might give us the stream buffer.
|
||||||
|
const BufferId buffer_id = FindBuffer(src, num_bytes);
|
||||||
|
auto& buffer = slot_buffers[buffer_id];
|
||||||
|
SynchronizeBuffer(buffer, src, num_bytes, false);
|
||||||
|
return buffer;
|
||||||
|
}();
|
||||||
|
auto& dst_buffer = [&] -> const Buffer& {
|
||||||
|
if (dst_gds) {
|
||||||
|
return gds_buffer;
|
||||||
|
}
|
||||||
|
// Prefer using ObtainBuffer here as that will auto-mark the region as GPU modified.
|
||||||
|
const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true);
|
||||||
|
return *buffer;
|
||||||
|
}();
|
||||||
|
vk::BufferCopy region{
|
||||||
|
.srcOffset = src_buffer.Offset(src),
|
||||||
|
.dstOffset = dst_buffer.Offset(dst),
|
||||||
|
.size = num_bytes,
|
||||||
|
};
|
||||||
|
const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
|
||||||
|
{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = dst_buffer.Handle(),
|
||||||
|
.offset = dst_buffer.Offset(dst),
|
||||||
|
.size = num_bytes,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.buffer = src_buffer.Handle(),
|
||||||
|
.offset = src_buffer.Offset(src),
|
||||||
|
.size = num_bytes,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
scheduler.EndRendering();
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 2,
|
||||||
|
.pBufferMemoryBarriers = buf_barriers_before,
|
||||||
|
});
|
||||||
|
cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);
|
||||||
|
const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
|
||||||
|
{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.buffer = dst_buffer.Handle(),
|
||||||
|
.offset = dst_buffer.Offset(dst),
|
||||||
|
.size = num_bytes,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.buffer = src_buffer.Handle(),
|
||||||
|
.offset = src_buffer.Offset(src),
|
||||||
|
.size = num_bytes,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 2,
|
||||||
|
.pBufferMemoryBarriers = buf_barriers_after,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||||
bool is_texel_buffer, BufferId buffer_id) {
|
bool is_texel_buffer, BufferId buffer_id) {
|
||||||
// For small uniform buffers that have not been modified by gpu
|
// For read-only buffers use device local stream buffer to reduce renderpass breaks.
|
||||||
// use device local stream buffer to reduce renderpass breaks.
|
if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size)) {
|
||||||
// Maybe we want to modify the threshold now that the page size is 16KB?
|
|
||||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
|
||||||
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
|
|
||||||
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
|
|
||||||
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
||||||
return {&stream_buffer, offset};
|
return {&stream_buffer, offset};
|
||||||
}
|
}
|
||||||
|
if (IsBufferInvalid(buffer_id)) {
|
||||||
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
|
|
||||||
buffer_id = FindBuffer(device_addr, size);
|
buffer_id = FindBuffer(device_addr, size);
|
||||||
}
|
}
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
||||||
if (is_written) {
|
if (is_written) {
|
||||||
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
memory_tracker->MarkRegionAsGpuModified(device_addr, size);
|
||||||
gpu_modified_ranges.Add(device_addr, size);
|
gpu_modified_ranges.Add(device_addr, size);
|
||||||
}
|
}
|
||||||
return {&buffer, buffer.Offset(device_addr)};
|
return {&buffer, buffer.Offset(device_addr)};
|
||||||
|
@ -352,21 +451,17 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
||||||
// Check if any buffer contains the full requested range.
|
// Check if any buffer contains the full requested range.
|
||||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS].buffer_id;
|
||||||
const BufferId buffer_id = page_table[page].buffer_id;
|
|
||||||
if (buffer_id) {
|
if (buffer_id) {
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
if (Buffer& buffer = slot_buffers[buffer_id]; buffer.IsInBounds(gpu_addr, size)) {
|
||||||
if (buffer.IsInBounds(gpu_addr, size)) {
|
|
||||||
SynchronizeBuffer(buffer, gpu_addr, size, false);
|
SynchronizeBuffer(buffer, gpu_addr, size, false);
|
||||||
return {&buffer, buffer.Offset(gpu_addr)};
|
return {&buffer, buffer.Offset(gpu_addr)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If no buffer contains the full requested range but some buffer within was GPU-modified,
|
// If some buffer within was GPU modified create a full buffer to avoid losing GPU data.
|
||||||
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
|
if (IsRegionGpuModified(gpu_addr, size)) {
|
||||||
if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
|
||||||
return ObtainBuffer(gpu_addr, size, false, false);
|
return ObtainBuffer(gpu_addr, size, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// In all other cases, just do a CPU copy to the staging buffer.
|
// In all other cases, just do a CPU copy to the staging buffer.
|
||||||
const auto [data, offset] = staging_buffer.Map(size, 16);
|
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||||
memory->CopySparseMemory(gpu_addr, data, size);
|
memory->CopySparseMemory(gpu_addr, data, size);
|
||||||
|
@ -380,11 +475,11 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||||
return memory_tracker.IsRegionCpuModified(addr, size);
|
return memory_tracker->IsRegionCpuModified(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
return memory_tracker.IsRegionGpuModified(addr, size);
|
return memory_tracker->IsRegionGpuModified(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
||||||
|
@ -723,7 +818,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
VAddr buffer_start = buffer.CpuAddr();
|
VAddr buffer_start = buffer.CpuAddr();
|
||||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
memory_tracker->ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||||
copies.push_back(vk::BufferCopy{
|
copies.push_back(vk::BufferCopy{
|
||||||
.srcOffset = total_size_bytes,
|
.srcOffset = total_size_bytes,
|
||||||
.dstOffset = device_addr_out - buffer_start,
|
.dstOffset = device_addr_out - buffer_start,
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#include "common/slot_vector.h"
|
#include "common/slot_vector.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/buffer_cache/buffer.h"
|
#include "video_core/buffer_cache/buffer.h"
|
||||||
#include "video_core/buffer_cache/memory_tracker.h"
|
|
||||||
#include "video_core/buffer_cache/range_set.h"
|
#include "video_core/buffer_cache/range_set.h"
|
||||||
#include "video_core/multi_level_page_table.h"
|
#include "video_core/multi_level_page_table.h"
|
||||||
|
|
||||||
|
@ -21,13 +20,6 @@ namespace Core {
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace Shader {
|
|
||||||
namespace Gcn {
|
|
||||||
struct FetchShaderData;
|
|
||||||
}
|
|
||||||
struct Info;
|
|
||||||
} // namespace Shader
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
class GraphicsPipeline;
|
class GraphicsPipeline;
|
||||||
}
|
}
|
||||||
|
@ -39,6 +31,8 @@ using BufferId = Common::SlotId;
|
||||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||||
|
|
||||||
class TextureCache;
|
class TextureCache;
|
||||||
|
class MemoryTracker;
|
||||||
|
class PageManager;
|
||||||
|
|
||||||
class BufferCache {
|
class BufferCache {
|
||||||
public:
|
public:
|
||||||
|
@ -69,10 +63,16 @@ public:
|
||||||
bool has_stream_leap = false;
|
bool has_stream_leap = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using IntervalSet =
|
||||||
|
boost::icl::interval_set<VAddr, std::less,
|
||||||
|
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
|
||||||
|
RangeSetsAllocator>;
|
||||||
|
using IntervalType = typename IntervalSet::interval_type;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool,
|
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||||
TextureCache& texture_cache, PageManager& tracker);
|
PageManager& tracker);
|
||||||
~BufferCache();
|
~BufferCache();
|
||||||
|
|
||||||
/// Returns a pointer to GDS device local buffer.
|
/// Returns a pointer to GDS device local buffer.
|
||||||
|
@ -110,7 +110,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Invalidates any buffer in the logical page range.
|
/// Invalidates any buffer in the logical page range.
|
||||||
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
|
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||||
|
|
||||||
|
/// Waits on pending downloads in the logical page range.
|
||||||
|
void ReadMemory(VAddr device_addr, u64 size);
|
||||||
|
|
||||||
/// Binds host vertex buffers for the current draw.
|
/// Binds host vertex buffers for the current draw.
|
||||||
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
|
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
|
||||||
|
@ -124,6 +127,9 @@ public:
|
||||||
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
||||||
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||||
|
|
||||||
|
/// Performs buffer to buffer data copy on the GPU.
|
||||||
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
|
|
||||||
/// Obtains a buffer for the specified region.
|
/// Obtains a buffer for the specified region.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||||
bool is_texel_buffer = false,
|
bool is_texel_buffer = false,
|
||||||
|
@ -166,6 +172,10 @@ private:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool IsBufferInvalid(BufferId buffer_id) const {
|
||||||
|
return !buffer_id || slot_buffers[buffer_id].is_deleted;
|
||||||
|
}
|
||||||
|
|
||||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
||||||
|
|
||||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||||
|
@ -193,11 +203,10 @@ private:
|
||||||
|
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
Vulkan::Rasterizer& rasterizer;
|
|
||||||
AmdGpu::Liverpool* liverpool;
|
AmdGpu::Liverpool* liverpool;
|
||||||
Core::MemoryManager* memory;
|
Core::MemoryManager* memory;
|
||||||
TextureCache& texture_cache;
|
TextureCache& texture_cache;
|
||||||
PageManager& tracker;
|
std::unique_ptr<MemoryTracker> memory_tracker;
|
||||||
StreamBuffer staging_buffer;
|
StreamBuffer staging_buffer;
|
||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
StreamBuffer download_buffer;
|
StreamBuffer download_buffer;
|
||||||
|
@ -209,7 +218,6 @@ private:
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
RangeSet gpu_modified_ranges;
|
RangeSet gpu_modified_ranges;
|
||||||
SplitRangeMap<BufferId> buffer_ranges;
|
SplitRangeMap<BufferId> buffer_ranges;
|
||||||
MemoryTracker memory_tracker;
|
|
||||||
PageTable page_table;
|
PageTable page_table;
|
||||||
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
||||||
vk::UniquePipeline fault_process_pipeline;
|
vk::UniquePipeline fault_process_pipeline;
|
||||||
|
|
|
@ -57,6 +57,14 @@ public:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||||
|
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||||
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
|
manager->template ChangeRegionState<Type::GPU, false>(
|
||||||
|
manager->GetCpuAddr() + offset, size);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||||
IteratePages<true>(query_cpu_range, query_size,
|
IteratePages<true>(query_cpu_range, query_size,
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
|
||||||
#include "common/bit_array.h"
|
#include "common/bit_array.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
|
@ -20,9 +19,8 @@ constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PE
|
||||||
enum class Type {
|
enum class Type {
|
||||||
CPU,
|
CPU,
|
||||||
GPU,
|
GPU,
|
||||||
Writeable,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <utility>
|
#include "common/config.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
@ -20,7 +20,7 @@
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
|
* Allows tracking CPU and GPU modification of pages in a contigious 16MB virtual address region.
|
||||||
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
||||||
*/
|
*/
|
||||||
class RegionManager {
|
class RegionManager {
|
||||||
|
@ -30,6 +30,7 @@ public:
|
||||||
cpu.Fill();
|
cpu.Fill();
|
||||||
gpu.Clear();
|
gpu.Clear();
|
||||||
writeable.Fill();
|
writeable.Fill();
|
||||||
|
readable.Fill();
|
||||||
}
|
}
|
||||||
explicit RegionManager() = default;
|
explicit RegionManager() = default;
|
||||||
|
|
||||||
|
@ -47,29 +48,19 @@ public:
|
||||||
|
|
||||||
template <Type type>
|
template <Type type>
|
||||||
RegionBits& GetRegionBits() noexcept {
|
RegionBits& GetRegionBits() noexcept {
|
||||||
static_assert(type != Type::Writeable);
|
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
return cpu;
|
return cpu;
|
||||||
} else if constexpr (type == Type::GPU) {
|
} else if constexpr (type == Type::GPU) {
|
||||||
return gpu;
|
return gpu;
|
||||||
} else if constexpr (type == Type::Writeable) {
|
|
||||||
return writeable;
|
|
||||||
} else {
|
|
||||||
static_assert(false, "Invalid type");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <Type type>
|
template <Type type>
|
||||||
const RegionBits& GetRegionBits() const noexcept {
|
const RegionBits& GetRegionBits() const noexcept {
|
||||||
static_assert(type != Type::Writeable);
|
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
return cpu;
|
return cpu;
|
||||||
} else if constexpr (type == Type::GPU) {
|
} else if constexpr (type == Type::GPU) {
|
||||||
return gpu;
|
return gpu;
|
||||||
} else if constexpr (type == Type::Writeable) {
|
|
||||||
return writeable;
|
|
||||||
} else {
|
|
||||||
static_assert(false, "Invalid type");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,7 +81,6 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::scoped_lock lk{lock};
|
std::scoped_lock lk{lock};
|
||||||
static_assert(type != Type::Writeable);
|
|
||||||
|
|
||||||
RegionBits& bits = GetRegionBits<type>();
|
RegionBits& bits = GetRegionBits<type>();
|
||||||
if constexpr (enable) {
|
if constexpr (enable) {
|
||||||
|
@ -99,7 +89,9 @@ public:
|
||||||
bits.UnsetRange(start_page, end_page);
|
bits.UnsetRange(start_page, end_page);
|
||||||
}
|
}
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
UpdateProtection<!enable>();
|
UpdateProtection<!enable, false>();
|
||||||
|
} else if (Config::readbacks()) {
|
||||||
|
UpdateProtection<enable, true>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,16 +114,10 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::scoped_lock lk{lock};
|
std::scoped_lock lk{lock};
|
||||||
static_assert(type != Type::Writeable);
|
|
||||||
|
|
||||||
RegionBits& bits = GetRegionBits<type>();
|
RegionBits& bits = GetRegionBits<type>();
|
||||||
RegionBits mask(bits, start_page, end_page);
|
RegionBits mask(bits, start_page, end_page);
|
||||||
|
|
||||||
// TODO: this will not be needed once we handle readbacks
|
|
||||||
if constexpr (type == Type::GPU) {
|
|
||||||
mask &= ~writeable;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto& [start, end] : mask) {
|
for (const auto& [start, end] : mask) {
|
||||||
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
|
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
|
||||||
}
|
}
|
||||||
|
@ -139,7 +125,9 @@ public:
|
||||||
if constexpr (clear) {
|
if constexpr (clear) {
|
||||||
bits.UnsetRange(start_page, end_page);
|
bits.UnsetRange(start_page, end_page);
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
UpdateProtection<true>();
|
UpdateProtection<true, false>();
|
||||||
|
} else if (Config::readbacks()) {
|
||||||
|
UpdateProtection<false, true>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -151,7 +139,7 @@ public:
|
||||||
* @param size Size in bytes of the region to query for modifications
|
* @param size Size in bytes of the region to query for modifications
|
||||||
*/
|
*/
|
||||||
template <Type type>
|
template <Type type>
|
||||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) noexcept {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
|
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
|
||||||
const size_t end_page =
|
const size_t end_page =
|
||||||
|
@ -159,17 +147,10 @@ public:
|
||||||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// std::scoped_lock lk{lock}; // Is this needed?
|
std::scoped_lock lk{lock};
|
||||||
static_assert(type != Type::Writeable);
|
|
||||||
|
|
||||||
const RegionBits& bits = GetRegionBits<type>();
|
const RegionBits& bits = GetRegionBits<type>();
|
||||||
RegionBits test(bits, start_page, end_page);
|
RegionBits test(bits, start_page, end_page);
|
||||||
|
|
||||||
// TODO: this will not be needed once we handle readbacks
|
|
||||||
if constexpr (type == Type::GPU) {
|
|
||||||
test &= ~writeable;
|
|
||||||
}
|
|
||||||
|
|
||||||
return test.Any();
|
return test.Any();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,19 +162,21 @@ private:
|
||||||
* @param current_bits Current state of the word
|
* @param current_bits Current state of the word
|
||||||
* @param new_bits New state of the word
|
* @param new_bits New state of the word
|
||||||
*
|
*
|
||||||
* @tparam add_to_tracker True when the tracker should start tracking the new pages
|
* @tparam track True when the tracker should start tracking the new pages
|
||||||
*/
|
*/
|
||||||
template <bool add_to_tracker>
|
template <bool track, bool is_read>
|
||||||
void UpdateProtection() {
|
void UpdateProtection() {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
RegionBits mask = cpu ^ writeable;
|
RegionBits mask = is_read ? (~gpu ^ readable) : (cpu ^ writeable);
|
||||||
|
|
||||||
if (mask.None()) {
|
if (mask.None()) {
|
||||||
return; // No changes to the CPU tracking state
|
return;
|
||||||
}
|
}
|
||||||
|
if constexpr (is_read) {
|
||||||
writeable = cpu;
|
readable = ~gpu;
|
||||||
tracker->UpdatePageWatchersForRegion<add_to_tracker>(cpu_addr, mask);
|
} else {
|
||||||
|
writeable = cpu;
|
||||||
|
}
|
||||||
|
tracker->UpdatePageWatchersForRegion<track, is_read>(cpu_addr, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
|
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
|
||||||
|
@ -206,6 +189,7 @@ private:
|
||||||
RegionBits cpu;
|
RegionBits cpu;
|
||||||
RegionBits gpu;
|
RegionBits gpu;
|
||||||
RegionBits writeable;
|
RegionBits writeable;
|
||||||
|
RegionBits readable;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#ifndef _WIN64
|
#ifndef _WIN64
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
#include "common/adaptive_mutex.h"
|
||||||
#ifdef ENABLE_USERFAULTFD
|
#ifdef ENABLE_USERFAULTFD
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
@ -23,6 +24,7 @@
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
#include "common/spin_lock.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
@ -38,22 +40,45 @@ constexpr size_t PAGE_BITS = 12;
|
||||||
|
|
||||||
struct PageManager::Impl {
|
struct PageManager::Impl {
|
||||||
struct PageState {
|
struct PageState {
|
||||||
u8 num_watchers{};
|
u8 num_write_watchers : 7;
|
||||||
|
// At the moment only buffer cache can request read watchers.
|
||||||
|
// And buffers cannot overlap, thus only 1 can exist per page.
|
||||||
|
u8 num_read_watchers : 1;
|
||||||
|
|
||||||
Core::MemoryPermission Perm() const noexcept {
|
Core::MemoryPermission WritePerm() const noexcept {
|
||||||
return num_watchers == 0 ? Core::MemoryPermission::ReadWrite
|
return num_write_watchers == 0 ? Core::MemoryPermission::Write
|
||||||
: Core::MemoryPermission::Read;
|
: Core::MemoryPermission::None;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <s32 delta>
|
Core::MemoryPermission ReadPerm() const noexcept {
|
||||||
|
return num_read_watchers == 0 ? Core::MemoryPermission::Read
|
||||||
|
: Core::MemoryPermission::None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Core::MemoryPermission Perms() const noexcept {
|
||||||
|
return ReadPerm() | WritePerm();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <s32 delta, bool is_read>
|
||||||
u8 AddDelta() {
|
u8 AddDelta() {
|
||||||
if constexpr (delta == 1) {
|
if constexpr (is_read) {
|
||||||
return ++num_watchers;
|
if constexpr (delta == 1) {
|
||||||
} else if constexpr (delta == -1) {
|
return ++num_read_watchers;
|
||||||
ASSERT_MSG(num_watchers > 0, "Not enough watchers");
|
} else if (delta == -1) {
|
||||||
return --num_watchers;
|
ASSERT_MSG(num_read_watchers > 0, "Not enough watchers");
|
||||||
|
return --num_read_watchers;
|
||||||
|
} else {
|
||||||
|
return num_read_watchers;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return num_watchers;
|
if constexpr (delta == 1) {
|
||||||
|
return ++num_write_watchers;
|
||||||
|
} else if (delta == -1) {
|
||||||
|
ASSERT_MSG(num_write_watchers > 0, "Not enough watchers");
|
||||||
|
return --num_write_watchers;
|
||||||
|
} else {
|
||||||
|
return num_write_watchers;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -176,6 +201,7 @@ struct PageManager::Impl {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
auto* memory = Core::Memory::Instance();
|
auto* memory = Core::Memory::Instance();
|
||||||
auto& impl = memory->GetAddressSpace();
|
auto& impl = memory->GetAddressSpace();
|
||||||
|
// ASSERT(perms != Core::MemoryPermission::Write);
|
||||||
impl.Protect(address, size, perms);
|
impl.Protect(address, size, perms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,12 +209,14 @@ struct PageManager::Impl {
|
||||||
const auto addr = reinterpret_cast<VAddr>(fault_address);
|
const auto addr = reinterpret_cast<VAddr>(fault_address);
|
||||||
if (Common::IsWriteError(context)) {
|
if (Common::IsWriteError(context)) {
|
||||||
return rasterizer->InvalidateMemory(addr, 1);
|
return rasterizer->InvalidateMemory(addr, 1);
|
||||||
|
} else {
|
||||||
|
return rasterizer->ReadMemory(addr, 1);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
template <bool track>
|
|
||||||
|
template <bool track, bool is_read>
|
||||||
void UpdatePageWatchers(VAddr addr, u64 size) {
|
void UpdatePageWatchers(VAddr addr, u64 size) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
|
@ -200,7 +228,7 @@ struct PageManager::Impl {
|
||||||
const auto lock_end = locks.begin() + Common::DivCeil(page_end, PAGES_PER_LOCK);
|
const auto lock_end = locks.begin() + Common::DivCeil(page_end, PAGES_PER_LOCK);
|
||||||
Common::RangeLockGuard lk(lock_start, lock_end);
|
Common::RangeLockGuard lk(lock_start, lock_end);
|
||||||
|
|
||||||
auto perms = cached_pages[page].Perm();
|
auto perms = cached_pages[page].Perms();
|
||||||
u64 range_begin = 0;
|
u64 range_begin = 0;
|
||||||
u64 range_bytes = 0;
|
u64 range_bytes = 0;
|
||||||
u64 potential_range_bytes = 0;
|
u64 potential_range_bytes = 0;
|
||||||
|
@ -226,9 +254,9 @@ struct PageManager::Impl {
|
||||||
PageState& state = cached_pages[page];
|
PageState& state = cached_pages[page];
|
||||||
|
|
||||||
// Apply the change to the page state
|
// Apply the change to the page state
|
||||||
const u8 new_count = state.AddDelta<track ? 1 : -1>();
|
const u8 new_count = state.AddDelta<track ? 1 : -1, is_read>();
|
||||||
|
|
||||||
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
if (auto new_perms = state.Perms(); new_perms != perms) [[unlikely]] {
|
||||||
// If the protection changed add pending (un)protect action
|
// If the protection changed add pending (un)protect action
|
||||||
release_pending();
|
release_pending();
|
||||||
perms = new_perms;
|
perms = new_perms;
|
||||||
|
@ -253,25 +281,23 @@ struct PageManager::Impl {
|
||||||
release_pending();
|
release_pending();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool track>
|
template <bool track, bool is_read>
|
||||||
void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) {
|
void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
auto start_range = mask.FirstRange();
|
auto start_range = mask.FirstRange();
|
||||||
auto end_range = mask.LastRange();
|
auto end_range = mask.LastRange();
|
||||||
|
|
||||||
if (start_range.second == end_range.second) {
|
if (start_range.second == end_range.second) {
|
||||||
// Optimization: if all pages are contiguous, use the regular UpdatePageWatchers
|
// if all pages are contiguous, use the regular UpdatePageWatchers
|
||||||
const VAddr start_addr = base_addr + (start_range.first << PAGE_BITS);
|
const VAddr start_addr = base_addr + (start_range.first << PAGE_BITS);
|
||||||
const u64 size = (start_range.second - start_range.first) << PAGE_BITS;
|
const u64 size = (start_range.second - start_range.first) << PAGE_BITS;
|
||||||
|
return UpdatePageWatchers<track, is_read>(start_addr, size);
|
||||||
UpdatePageWatchers<track>(start_addr, size);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t base_page = (base_addr >> PAGE_BITS);
|
size_t base_page = (base_addr >> PAGE_BITS);
|
||||||
ASSERT(base_page % PAGES_PER_LOCK == 0);
|
ASSERT(base_page % PAGES_PER_LOCK == 0);
|
||||||
std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]);
|
std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]);
|
||||||
auto perms = cached_pages[base_page + start_range.first].Perm();
|
auto perms = cached_pages[base_page + start_range.first].Perms();
|
||||||
u64 range_begin = 0;
|
u64 range_begin = 0;
|
||||||
u64 range_bytes = 0;
|
u64 range_bytes = 0;
|
||||||
u64 potential_range_bytes = 0;
|
u64 potential_range_bytes = 0;
|
||||||
|
@ -292,9 +318,10 @@ struct PageManager::Impl {
|
||||||
const bool update = mask.Get(page);
|
const bool update = mask.Get(page);
|
||||||
|
|
||||||
// Apply the change to the page state
|
// Apply the change to the page state
|
||||||
const u8 new_count = update ? state.AddDelta<track ? 1 : -1>() : state.AddDelta<0>();
|
const u8 new_count =
|
||||||
|
update ? state.AddDelta<track ? 1 : -1, is_read>() : state.AddDelta<0, is_read>();
|
||||||
|
|
||||||
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
if (auto new_perms = state.Perms(); new_perms != perms) [[unlikely]] {
|
||||||
// If the protection changed add pending (un)protect action
|
// If the protection changed add pending (un)protect action
|
||||||
release_pending();
|
release_pending();
|
||||||
perms = new_perms;
|
perms = new_perms;
|
||||||
|
@ -348,19 +375,23 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
|
||||||
|
|
||||||
template <bool track>
|
template <bool track>
|
||||||
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
|
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
|
||||||
impl->UpdatePageWatchers<track>(addr, size);
|
impl->UpdatePageWatchers<track, false>(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool track>
|
template <bool track, bool is_read>
|
||||||
void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const {
|
void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const {
|
||||||
impl->UpdatePageWatchersForRegion<track>(base_addr, mask);
|
impl->UpdatePageWatchersForRegion<track, is_read>(base_addr, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void PageManager::UpdatePageWatchers<true>(VAddr addr, u64 size) const;
|
template void PageManager::UpdatePageWatchers<true>(VAddr addr, u64 size) const;
|
||||||
template void PageManager::UpdatePageWatchers<false>(VAddr addr, u64 size) const;
|
template void PageManager::UpdatePageWatchers<false>(VAddr addr, u64 size) const;
|
||||||
template void PageManager::UpdatePageWatchersForRegion<true>(VAddr base_addr,
|
template void PageManager::UpdatePageWatchersForRegion<true, true>(VAddr base_addr,
|
||||||
RegionBits& mask) const;
|
RegionBits& mask) const;
|
||||||
template void PageManager::UpdatePageWatchersForRegion<false>(VAddr base_addr,
|
template void PageManager::UpdatePageWatchersForRegion<true, false>(VAddr base_addr,
|
||||||
RegionBits& mask) const;
|
RegionBits& mask) const;
|
||||||
|
template void PageManager::UpdatePageWatchersForRegion<false, true>(VAddr base_addr,
|
||||||
|
RegionBits& mask) const;
|
||||||
|
template void PageManager::UpdatePageWatchersForRegion<false, false>(VAddr base_addr,
|
||||||
|
RegionBits& mask) const;
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -37,9 +37,8 @@ public:
|
||||||
template <bool track>
|
template <bool track>
|
||||||
void UpdatePageWatchers(VAddr addr, u64 size) const;
|
void UpdatePageWatchers(VAddr addr, u64 size) const;
|
||||||
|
|
||||||
/// Updates watches in the pages touching the specified region
|
/// Updates watches in the pages touching the specified region using a mask.
|
||||||
/// using a mask.
|
template <bool track, bool is_read = false>
|
||||||
template <bool track>
|
|
||||||
void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const;
|
void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const;
|
||||||
|
|
||||||
/// Returns page aligned address.
|
/// Returns page aligned address.
|
||||||
|
|
|
@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||||
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
||||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
|
@ -945,6 +945,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo
|
||||||
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||||
|
buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds);
|
||||||
|
}
|
||||||
|
|
||||||
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
|
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
|
||||||
auto* gds_buf = buffer_cache.GetGdsBuffer();
|
auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||||
u32 value;
|
u32 value;
|
||||||
|
@ -957,11 +961,20 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) {
|
||||||
// Not GPU mapped memory, can skip invalidation logic entirely.
|
// Not GPU mapped memory, can skip invalidation logic entirely.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
buffer_cache.InvalidateMemory(addr, size, false);
|
buffer_cache.InvalidateMemory(addr, size);
|
||||||
texture_cache.InvalidateMemory(addr, size);
|
texture_cache.InvalidateMemory(addr, size);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Rasterizer::ReadMemory(VAddr addr, u64 size) {
|
||||||
|
if (!IsMapped(addr, size)) {
|
||||||
|
// Not GPU mapped memory, can skip invalidation logic entirely.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
buffer_cache.ReadMemory(addr, size);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Rasterizer::IsMapped(VAddr addr, u64 size) {
|
bool Rasterizer::IsMapped(VAddr addr, u64 size) {
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
// There is no memory, so not mapped.
|
// There is no memory, so not mapped.
|
||||||
|
@ -982,7 +995,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||||
buffer_cache.InvalidateMemory(addr, size, true);
|
buffer_cache.InvalidateMemory(addr, size);
|
||||||
texture_cache.UnmapMemory(addr, size);
|
texture_cache.UnmapMemory(addr, size);
|
||||||
page_manager.OnGpuUnmap(addr, size);
|
page_manager.OnGpuUnmap(addr, size);
|
||||||
{
|
{
|
||||||
|
|
|
@ -56,8 +56,10 @@ public:
|
||||||
bool from_guest = false);
|
bool from_guest = false);
|
||||||
|
|
||||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||||
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
u32 ReadDataFromGds(u32 gsd_offset);
|
u32 ReadDataFromGds(u32 gsd_offset);
|
||||||
bool InvalidateMemory(VAddr addr, u64 size);
|
bool InvalidateMemory(VAddr addr, u64 size);
|
||||||
|
bool ReadMemory(VAddr addr, u64 size);
|
||||||
bool IsMapped(VAddr addr, u64 size);
|
bool IsMapped(VAddr addr, u64 size);
|
||||||
void MapMemory(VAddr addr, u64 size);
|
void MapMemory(VAddr addr, u64 size);
|
||||||
void UnmapMemory(VAddr addr, u64 size);
|
void UnmapMemory(VAddr addr, u64 size);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue