Initial instancing and asynchronous compute queues (#207)

* gnm_driver: added `sceGnmRegisterOwner` and `sceGnmRegisterResource`

* video_out: `sceVideoOutGetDeviceCapabilityInfo` for sdk runtime

* gnm_driver: correct vqid index range

* amdgpu: indirect buffer, release mem and some additional irq modes

* amdgpu: added ASC commands processor

* shader_recompiler: added support for fetch instance id

* amdgpu: classic bitfields for T# representation (debugging experience)

* renderer_vulkan: skip zero sized VBs from binding

* texture_cache: image upload logic moved into `Image` object

* gnm_driver: `sceGnmDingDong` implementation

* texture_cache: `Image` usage flags moved; correct VO buffer pitch
This commit is contained in:
psucien 2024-06-22 18:50:20 +02:00 committed by GitHub
parent a9cbd8287c
commit cb6b21de1f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 361 additions and 100 deletions

View file

@ -252,6 +252,7 @@ constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_QUEUE_ID = 0x80D17001;
constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_RING_BASE_ADDR = 0x80D17003;
constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_RING_SIZE = 0x80D17002;
constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR = 0x80D17004;
constexpr int ORBIS_GNM_ERROR_FAILURE = 0x8EEE00FF;
// Generic
constexpr int ORBIS_OK = 0x00000000;

View file

@ -31,6 +31,7 @@ static constexpr bool g_fair_hw_init = false;
// In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{};
static std::mutex m_submission{};
static u64 frames_submitted{}; // frame counter
struct AscQueueInfo {
@ -211,9 +212,32 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmDingDong() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
LOG_INFO(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
if (gnm_vqid == 0) {
return;
}
std::unique_lock lock{m_submission};
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
// Suspend logic goes here
submission_lock = 0;
}
auto vqid = gnm_vqid - 1;
auto& asc_queue = asc_queues[{vqid}];
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
liverpool->SubmitAsc(vqid, {acb_ptr, acb_size >> 2u});
*asc_queue.read_addr += acb_size;
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
}
int PS4_SYSV_ABI sceGnmDingDongForWorkload() {
@ -764,10 +788,12 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
}
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
const auto gnm_vqid = vqid.index + 1;
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
vqid.index);
gnm_vqid);
return vqid.index;
return gnm_vqid;
}
int PS4_SYSV_ABI sceGnmMapComputeQueueWithPriority(u32 pipe_id, u32 queue_id, VAddr ring_base_addr,
@ -814,14 +840,16 @@ int PS4_SYSV_ABI sceGnmRegisterGnmLiveCallbackConfig() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmRegisterOwner() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
s32 PS4_SYSV_ABI sceGnmRegisterOwner(void* handle, const char* name) {
LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_GNM_ERROR_FAILURE; // PA Debug is always disabled in retail FW
}
int PS4_SYSV_ABI sceGnmRegisterResource() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
s32 PS4_SYSV_ABI sceGnmRegisterResource(void* res_handle, void* owner_handle, const void* addr,
size_t size, const char* name, int res_type,
u64 user_data) {
LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_GNM_ERROR_FAILURE; // PA Debug is always disabled in retail FW
}
int PS4_SYSV_ABI sceGnmRequestFlipAndSubmitDone() {

View file

@ -33,7 +33,7 @@ int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister();
int PS4_SYSV_ABI sceGnmDebugHardwareStatus();
s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id);
int PS4_SYSV_ABI sceGnmDestroyWorkloadStream();
int PS4_SYSV_ABI sceGnmDingDong();
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw);
int PS4_SYSV_ABI sceGnmDingDongForWorkload();
int PS4_SYSV_ABI sceGnmDisableMipStatsReport();
s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
@ -125,8 +125,9 @@ int PS4_SYSV_ABI sceGnmQueryResourceRegistrationUserMemoryRequirements();
int PS4_SYSV_ABI sceGnmRaiseUserExceptionEvent();
int PS4_SYSV_ABI sceGnmRegisterGdsResource();
int PS4_SYSV_ABI sceGnmRegisterGnmLiveCallbackConfig();
int PS4_SYSV_ABI sceGnmRegisterOwner();
int PS4_SYSV_ABI sceGnmRegisterResource();
s32 PS4_SYSV_ABI sceGnmRegisterOwner(void* handle, const char* name);
s32 PS4_SYSV_ABI sceGnmRegisterResource(void* res_handle, void* owner_handle, const void* addr,
size_t size, const char* name, int res_type, u64 user_data);
int PS4_SYSV_ABI sceGnmRequestFlipAndSubmitDone();
int PS4_SYSV_ABI sceGnmRequestFlipAndSubmitDoneForWorkload();
int PS4_SYSV_ABI sceGnmRequestMipStatsReportAndReset();

View file

@ -288,6 +288,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("uquVH4-Du78", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutClose);
LIB_FUNCTION("1FZBKy8HeNU", "libSceVideoOut", 1, "libSceVideoOut", 0, 0,
sceVideoOutGetVblankStatus);
LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 0, 0,
sceVideoOutGetDeviceCapabilityInfo);
// openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1
LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen);