GnmDriver: Clear context support (#567)

* gnmdriver: added support for gpu context reset

* shader_recompiler: minor validation fixes

* shader_recompiler: added `V_CMPX_GT_I32`

* shader_recompiler: fix for crash on inline sampler access

* compilation warnings and dead code elimination

* amdgpu: fix for registers addressing

* libraries: videoout: reduce logging pressure

* shader_recompiler: fix for devergence scope detection
This commit is contained in:
psucien 2024-08-25 22:01:05 +02:00 committed by GitHub
parent c2ddfe51e1
commit b687ae5e34
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 147 additions and 50 deletions

View file

@ -55,6 +55,10 @@ static constexpr auto HwInitPacketSize = 0x100u;
// clang-format off
static constexpr std::array InitSequence{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
@ -94,9 +98,13 @@ static constexpr std::array InitSequence{
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence.size() == 0x73);
static_assert(InitSequence.size() == 0x73 + 2);
static constexpr std::array InitSequence175{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
@ -136,9 +144,13 @@ static constexpr std::array InitSequence175{
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence175.size() == 0x73);
static_assert(InitSequence175.size() == 0x73 + 2);
static constexpr std::array InitSequence200{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
@ -179,9 +191,13 @@ static constexpr std::array InitSequence200{
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence200.size() == 0x76);
static_assert(InitSequence200.size() == 0x76 + 2);
static constexpr std::array InitSequence350{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
@ -224,7 +240,7 @@ static constexpr std::array InitSequence350{
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x2aau, 0xffu,
};
static_assert(InitSequence350.size() == 0x7c);
static_assert(InitSequence350.size() == 0x7c + 2);
static constexpr std::array CtxInitSequence{
0xc0012800u, 0x80000000u, 0x80000000u,
@ -735,11 +751,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
cmdbuf = ClearContextState(cmdbuf);
}
std::memcpy(cmdbuf, InitSequence.data(), InitSequence.size() * 4);
cmdbuf += InitSequence.size();
std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4);
cmdbuf += InitSequence.size() - 2;
const auto cmdbuf_left =
HwInitPacketSize - InitSequence.size() - (clear_state ? 0xc : 0) - 1;
HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1;
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
@ -757,10 +773,10 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
}
cmdbuf = ClearContextState(cmdbuf);
std::memcpy(cmdbuf, InitSequence175.data(), InitSequence175.size() * 4);
cmdbuf += InitSequence175.size();
std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4);
cmdbuf += InitSequence175.size() - 2;
constexpr auto cmdbuf_left = HwInitPacketSize - InitSequence175.size() - 0xc - 1;
constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1;
WriteTrailingNop<cmdbuf_left>(cmdbuf);
return HwInitPacketSize;
@ -778,11 +794,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
cmdbuf = ClearContextState(cmdbuf);
}
std::memcpy(cmdbuf, InitSequence200.data(), InitSequence200.size() * 4);
cmdbuf += InitSequence200.size();
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
cmdbuf += InitSequence200.size() - 2;
const auto cmdbuf_left =
HwInitPacketSize - InitSequence200.size() - (clear_state ? 0xc : 0) - 1;
HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1;
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
@ -804,11 +820,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
cmdbuf = ClearContextState(cmdbuf);
}
std::memcpy(cmdbuf, InitSequence350.data(), InitSequence350.size() * 4);
cmdbuf += InitSequence350.size();
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
cmdbuf += InitSequence350.size() - 2;
const auto cmdbuf_left =
HwInitPacketSize - InitSequence350.size() - (clear_state ? 0xc : 0) - 1;
HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1;
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
@ -1743,7 +1759,7 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3
return -1;
}
const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier);
const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f) | shader_modifier;
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL

View file

@ -185,14 +185,16 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
std::unique_lock lock{port->port_mutex};
*status = port->flip_status;
{
std::unique_lock lock{port->port_mutex};
*status = port->flip_status;
}
LOG_INFO(Lib_VideoOut,
"count = {}, processTime = {}, tsc = {}, submitTsc = {}, flipArg = {}, gcQueueNum = "
"{}, flipPendingNum = {}, currentBuffer = {}",
status->count, status->processTime, status->tsc, status->submitTsc, status->flipArg,
status->gcQueueNum, status->flipPendingNum, status->currentBuffer);
LOG_TRACE(Lib_VideoOut,
"count = {}, processTime = {}, tsc = {}, submitTsc = {}, flipArg = {}, gcQueueNum = "
"{}, flipPendingNum = {}, currentBuffer = {}",
status->count, status->processTime, status->tsc, status->submitTsc, status->flipArg,
status->gcQueueNum, status->flipPendingNum, status->currentBuffer);
return ORBIS_OK;
}