nvdec syncpt incorporation

laying the groundwork for async gpu, although this does not fully implement async nvdec operations
This commit is contained in:
ameerj 2020-12-28 01:02:06 -05:00
parent bcb702fa3e
commit 2c27127d04
11 changed files with 59 additions and 37 deletions

View file

@ -11,8 +11,9 @@
namespace Service::Nvidia::Devices {
nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
: nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager)
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
nvhost_nvdec::~nvhost_nvdec() = default;
NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,

View file

@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices {
class nvhost_nvdec final : public nvhost_nvdec_common {
public:
explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager);
~nvhost_nvdec() override;
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

View file

@ -11,6 +11,7 @@
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/memory.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
}
} // Anonymous namespace
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager)
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}
nvhost_nvdec_common::~nvhost_nvdec_common() = default;
NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
@ -71,10 +73,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
offset = SpliceVectors(input, fences, params.fence_count, offset);
// TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
SyncptIncr syncpt_incr = syncpt_increments[i];
fences[i].id = syncpt_incr.id;
fences[i].value =
syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
}
auto& gpu = system.GPU();
for (const auto& cmd_buffer : command_buffers) {
auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
@ -89,6 +95,10 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(cmdlist);
}
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
gpu.PushCommandBuffer(cmdlist);
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back
@ -98,6 +108,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
offset = WriteVectors(output, reloc_shifts, offset);
offset = WriteVectors(output, syncpt_increments, offset);
offset = WriteVectors(output, wait_checks, offset);
offset = WriteVectors(output, fences, offset);
return NvResult::Success;
}
@ -107,9 +118,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
// We found that implementing this causes deadlocks with async gpu, along with degraded
// performance. TODO: RE the nvdec async implementation
params.value = 0;
if (device_syncpoints[params.param] == 0) {
device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
}
params.value = device_syncpoints[params.param];
std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
return NvResult::Success;

View file

@ -10,12 +10,16 @@
#include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Service::Nvidia::Devices {
namespace Service::Nvidia {
class SyncpointManager;
namespace Devices {
class nvmap;
class nvhost_nvdec_common : public nvdevice {
public:
explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager);
~nvhost_nvdec_common() override;
protected:
@ -157,8 +161,10 @@ protected:
s32_le nvmap_fd{};
u32_le submit_timeout{};
std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
std::array<u32, MaxSyncPoints> device_syncpoints{};
// This is expected to be ordered, therefore we must use a map, not unordered_map
std::map<GPUVAddr, BufferMap> buffer_mappings;
};
}; // namespace Service::Nvidia::Devices
}; // namespace Devices
} // namespace Service::Nvidia

View file

@ -10,8 +10,9 @@
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
: nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager)
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
nvhost_vic::~nvhost_vic() = default;

View file

@ -7,11 +7,11 @@
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
namespace Service::Nvidia::Devices {
class nvmap;
class nvhost_vic final : public nvhost_nvdec_common {
public:
explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager);
~nvhost_vic();
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

View file

@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
devices["/dev/nvhost-ctrl"] =
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
devices["/dev/nvhost-nvdec"] =
std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
devices["/dev/nvhost-vic"] =
std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
}
Module::~Module() = default;