mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 20:36:16 +00:00
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert * shader_recompiler: Add bit instructions * shader_recompiler: More data share instructions * shader_recompiler: Remove exec contexts, fix S_MOV_B64 * shader_recompiler: Split instruction parsing into categories * shader_recompiler: Better BFS search * shader_recompiler: Constant propagation pass for cmp_class_f32 * shader_recompiler: Partial readfirstlane implementation * shader_recompiler: Stub readlane/writelane only for non-compute * hack: Fix swizzle on RDR * Will properly fix this when merging this * clang format * address_space: Bump user area size to full * shader_recompiler: V_INTERP_MOV_F32 * Should work the same as spirv will emit flat decoration on demand * kernel: Add MAP_OP_MAP_FLEXIBLE * image_view: Attempt to apply storage swizzle on format * vk_scheduler: Barrier attachments on renderpass end * clang format * liverpool: cs state backup * shader_recompiler: More instructions and formats * vector_alu: Proper V_MBCNT_U32_B32 * shader_recompiler: Port some dark souls things * file_system: Implement sceKernelRename * more formats * clang format * resource_tracking_pass: Back to assert * translate: Tracedata * kernel: Remove tracy lock * Solves random crashes in Dark Souls * code: Review comments
This commit is contained in:
parent
ac6dc20c3b
commit
a7c9bfa5c5
66 changed files with 1349 additions and 904 deletions
|
@ -238,6 +238,18 @@ void FoldBooleanConvert(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsNan32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsInf32);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
|
@ -251,6 +263,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(inst);
|
||||
return;
|
||||
case IR::Opcode::ShiftRightArithmetic32:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
|
||||
return;
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
|
@ -273,9 +272,18 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
|
|||
}
|
||||
|
||||
SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||
while (inst->GetOpcode() == IR::Opcode::Phi) {
|
||||
inst = inst->Arg(0).InstRecursive();
|
||||
}
|
||||
// Search until we find a potential sharp source.
|
||||
const auto pred0 = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(inst, pred0);
|
||||
ASSERT_MSG(result, "Unable to track sharp source");
|
||||
inst = result.value();
|
||||
// If its from user data not much else to do.
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return SharpLocation{
|
||||
.sgpr_base = u32(IR::ScalarReg::Max),
|
||||
|
@ -289,14 +297,14 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
|
||||
|
||||
// Retrieve SGPR pair that holds sbase
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst->Arg(0).ScalarReg();
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred);
|
||||
const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred);
|
||||
const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred1);
|
||||
const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred1);
|
||||
ASSERT_MSG(base0 && base1, "Nested resource loads not supported");
|
||||
|
||||
// Return retrieved location.
|
||||
|
@ -456,36 +464,26 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
|||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
std::deque<IR::Inst*> insts{&inst};
|
||||
const auto& pred = [](auto opcode) -> bool {
|
||||
return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData);
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
IR::Inst* producer{};
|
||||
while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) {
|
||||
for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) {
|
||||
const auto arg = producer->Arg(arg_idx);
|
||||
if (arg.TryInstRecursive()) {
|
||||
insts.push_back(arg.InstRecursive());
|
||||
}
|
||||
}
|
||||
insts.pop_front();
|
||||
}
|
||||
ASSERT(pred(producer->GetOpcode()));
|
||||
auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||
return std::make_pair(producer->Arg(0).InstRecursive(),
|
||||
producer->Arg(1).InstRecursive());
|
||||
}
|
||||
return std::make_pair(producer, nullptr);
|
||||
}();
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle);
|
||||
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
|
@ -496,17 +494,32 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
});
|
||||
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
if (ssharp_handle) {
|
||||
const u32 sampler_binding = [&] {
|
||||
if (!has_sampler) {
|
||||
return 0U;
|
||||
}
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
return descriptors.Add(SamplerResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
||||
});
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud);
|
||||
const u32 sampler_binding = descriptors.Add(SamplerResource{
|
||||
return descriptors.Add(SamplerResource{
|
||||
.sgpr_base = ssharp.sgpr_base,
|
||||
.dword_offset = ssharp.dword_offset,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
image_binding |= (sampler_binding << 16);
|
||||
}
|
||||
}();
|
||||
image_binding |= (sampler_binding << 16);
|
||||
|
||||
// Patch image handle
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
@ -607,7 +620,7 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
|
|
|
@ -20,11 +20,19 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::LoadSharedU8:
|
||||
case IR::Opcode::WriteSharedU8:
|
||||
info.uses_shared_u8 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
info.uses_shared_u16 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::ConvertF32F16:
|
||||
case IR::Opcode::BitCastF16U16:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue