video_core: Move command buffer loop.
This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList.
This commit is contained in:
parent
c560043581
commit
0cfb0bacb2
5 changed files with 86 additions and 79 deletions
|
@ -8,6 +8,7 @@
|
|||
#include "core/core.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/command_processor.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
|
@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
|
|||
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
||||
params.address, params.num_entries, params.flags);
|
||||
|
||||
ASSERT_MSG(input.size() ==
|
||||
sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry),
|
||||
ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader),
|
||||
"Incorrect input size");
|
||||
|
||||
std::vector<IoctlGpfifoEntry> entries(params.num_entries);
|
||||
std::vector<Tegra::CommandListHeader> entries(params.num_entries);
|
||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||
params.num_entries * sizeof(IoctlGpfifoEntry));
|
||||
for (auto entry : entries) {
|
||||
Tegra::GPUVAddr va_addr = entry.Address();
|
||||
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
|
||||
}
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||
|
||||
Core::System::GetInstance().GPU().ProcessCommandLists(entries);
|
||||
|
||||
params.fence_out.id = 0;
|
||||
params.fence_out.value = 0;
|
||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
||||
|
@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
|
|||
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
||||
params.address, params.num_entries, params.flags);
|
||||
|
||||
std::vector<IoctlGpfifoEntry> entries(params.num_entries);
|
||||
std::vector<Tegra::CommandListHeader> entries(params.num_entries);
|
||||
Memory::ReadBlock(params.address, entries.data(),
|
||||
params.num_entries * sizeof(IoctlGpfifoEntry));
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||
|
||||
Core::System::GetInstance().GPU().ProcessCommandLists(entries);
|
||||
|
||||
for (auto entry : entries) {
|
||||
Tegra::GPUVAddr va_addr = entry.Address();
|
||||
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
|
||||
}
|
||||
params.fence_out.id = 0;
|
||||
params.fence_out.value = 0;
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include "common/common_types.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Service::Nvidia::Devices {
|
||||
|
||||
|
@ -151,22 +150,6 @@ private:
|
|||
};
|
||||
static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
|
||||
|
||||
struct IoctlGpfifoEntry {
|
||||
u32_le entry0; // gpu_va_lo
|
||||
union {
|
||||
u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
|
||||
BitField<0, 8, u32_le> gpu_va_hi;
|
||||
BitField<8, 2, u32_le> unk1;
|
||||
BitField<10, 21, u32_le> sz;
|
||||
BitField<31, 1, u32_le> unk2;
|
||||
};
|
||||
|
||||
Tegra::GPUVAddr Address() const {
|
||||
return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
|
||||
|
||||
struct IoctlSubmitGpfifo {
|
||||
u64_le address; // pointer to gpfifo entry structs
|
||||
u32_le num_entries; // number of fence objects being submitted
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue