diff --git a/CMakeLists.txt b/CMakeLists.txt index 466933608..38532760d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -689,6 +689,7 @@ set(COMMON src/common/logging/backend.cpp src/common/recursive_lock.cpp src/common/recursive_lock.h src/common/sha1.h + src/common/shared_first_mutex.h src/common/signal_context.h src/common/signal_context.cpp src/common/singleton.h diff --git a/src/common/config.cpp b/src/common/config.cpp index 462dd05c7..eded244d6 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -65,6 +65,7 @@ static u32 screenHeight = 720; static bool isNullGpu = false; static bool shouldCopyGPUBuffers = false; static bool readbacksEnabled = false; +static bool directMemoryAccessEnabled = false; static bool shouldDumpShaders = false; static bool shouldPatchShaders = false; static u32 vblankDivider = 1; @@ -102,7 +103,7 @@ u32 m_language = 1; // english static std::string trophyKey = ""; // Expected number of items in the config file -static constexpr u64 total_entries = 50; +static constexpr u64 total_entries = 51; bool allowHDR() { return isHDRAllowed; @@ -261,6 +262,10 @@ bool readbacks() { return readbacksEnabled; } +bool directMemoryAccess() { + return directMemoryAccessEnabled; +} + bool dumpShaders() { return shouldDumpShaders; } @@ -369,6 +374,10 @@ void setReadbacks(bool enable) { readbacksEnabled = enable; } +void setDirectMemoryAccess(bool enable) { + directMemoryAccessEnabled = enable; +} + void setDumpShaders(bool enable) { shouldDumpShaders = enable; } @@ -622,6 +631,7 @@ void load(const std::filesystem::path& path) { isNullGpu = toml::find_or(gpu, "nullGpu", isNullGpu); shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", shouldCopyGPUBuffers); readbacksEnabled = toml::find_or(gpu, "readbacks", readbacksEnabled); + directMemoryAccessEnabled = toml::find_or(gpu, "directMemoryAccess", directMemoryAccessEnabled); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", shouldDumpShaders); shouldPatchShaders = toml::find_or(gpu, "patchShaders", shouldPatchShaders); vblankDivider = toml::find_or(gpu, "vblankDivider", vblankDivider); @@ -791,6 +801,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["readbacks"] = readbacksEnabled; + data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["patchShaders"] = shouldPatchShaders; data["GPU"]["vblankDivider"] = vblankDivider; @@ -890,6 +901,7 @@ void setDefaultValues() { isNullGpu = false; shouldCopyGPUBuffers = false; readbacksEnabled = false; + directMemoryAccessEnabled = false; shouldDumpShaders = false; shouldPatchShaders = false; vblankDivider = 1; diff --git a/src/common/config.h b/src/common/config.h index 219461e7e..931fa68e2 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -47,6 +47,8 @@ bool copyGPUCmdBuffers(); void setCopyGPUCmdBuffers(bool enable); bool readbacks(); void setReadbacks(bool enable); +bool directMemoryAccess(); +void setDirectMemoryAccess(bool enable); bool dumpShaders(); void setDumpShaders(bool enable); u32 vblankDiv(); diff --git a/src/common/shared_first_mutex.h b/src/common/shared_first_mutex.h new file mode 100644 index 000000000..b150c956b --- /dev/null +++ b/src/common/shared_first_mutex.h @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +// Like std::shared_mutex, but reader has priority over writer. +class SharedFirstMutex { +public: + void lock() { + std::unique_lock lock(mtx); + cv.wait(lock, [this]() { return !writer_active && readers == 0; }); + writer_active = true; + } + + void unlock() { + std::lock_guard lock(mtx); + writer_active = false; + cv.notify_all(); + } + + void lock_shared() { + std::unique_lock lock(mtx); + cv.wait(lock, [this]() { return !writer_active; }); + ++readers; + } + + void unlock_shared() { + std::lock_guard lock(mtx); + if (--readers == 0) { + cv.notify_all(); + } + } + +private: + std::mutex mtx; + std::condition_variable cv; + int readers = 0; + bool writer_active = false; +}; + +} // namespace Common diff --git a/src/emulator.cpp b/src/emulator.cpp index d6d523fa0..fbab5929b 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector ar LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks()); + LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess()); LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 85e93f3fb..e37acb2e4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -200,10 +200,18 @@ Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); } +Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); +} + Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); } +Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); +} + Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { if (ctx.profile.supports_buffer_fp32_atomic_min_max) { return BufferAtomicU32(ctx, inst, handle, address, value, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 564fb3f80..f3a8c518c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "common/logging/log.h" #include "shader_recompiler/backend/spirv/emit_spirv_bounds.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" @@ -167,6 +168,9 @@ using PointerSize = EmitContext::PointerSize; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { const u32 flatbuf_off_dw = inst->Flags(); + if (!Config::directMemoryAccess()) { + return ctx.EmitFlatbufferLoad(ctx.ConstU32(flatbuf_off_dw)); + } // We can only provide a fallback for immediate offsets. if (flatbuf_off_dw == 0) { return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 15a8fd99b..1ac2266bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -91,7 +91,9 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); @@ -406,14 +408,20 @@ Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 1a995354d..ddc1e7574 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -371,19 +371,35 @@ Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpIEqual(ctx.U1[1], lhs, rhs); } -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs); } -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs); } -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs); } -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs); } @@ -395,11 +411,19 @@ Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpINotEqual(ctx.U1[1], lhs, rhs); } -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs); } -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 524914ad4..77336c9ec 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -784,19 +784,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte }; void EmitContext::DefineBuffers() { - if (!profile.supports_robust_buffer_access && !info.uses_dma) { - // In case Flatbuf has not already been bound by IR and is needed - // to query buffer sizes, bind it now. - info.buffers.push_back({ - .used_types = IR::Type::U32, - // We can't guarantee that flatbuf will not grow past UBO - // limit if there are a lot of ReadConsts. (We could specialize) - .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits::max()), - .buffer_type = BufferType::Flatbuf, - }); - // In the future we may want to read buffer sizes from GPU memory if available. - // info.readconst_types |= Info::ReadConstType::Immediate; - } for (const auto& desc : info.buffers) { const auto buf_sharp = desc.GetSharp(info); const bool is_storage = desc.IsStorage(buf_sharp, profile); @@ -1219,14 +1206,7 @@ Id EmitContext::DefineReadConst(bool dynamic) { if (dynamic) { return u32_zero_value; } else { - const auto& flatbuf_buffer{buffers[flatbuf_index]}; - ASSERT(flatbuf_buffer.binding >= 0 && - flatbuf_buffer.buffer_type == BufferType::Flatbuf); - const auto [flatbuf_buffer_id, flatbuf_pointer_type] = - flatbuf_buffer.Alias(PointerType::U32); - const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, - flatbuf_offset)}; - return OpLoad(U32[1], ptr); + return EmitFlatbufferLoad(flatbuf_offset); } }); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index f8c6416e8..28e9099d8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -180,6 +180,16 @@ public: return OpAccessChain(result_type, shared_mem, index); } + Id EmitFlatbufferLoad(Id flatbuf_offset) { + const auto& flatbuf_buffer{buffers[flatbuf_index]}; + ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf); + const auto [flatbuf_buffer_id, flatbuf_pointer_type] = + flatbuf_buffer.aliases[u32(PointerType::U32)]; + const auto ptr{ + OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)}; + return OpLoad(U32[1], ptr); + } + Info& info; const RuntimeInfo& runtime_info; const Profile& profile; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ece334bcd..b5bfec344 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -20,7 +20,7 @@ namespace Shader::Gcn { enum class ConditionOp : u32 { F, EQ, - LG, + LG, // NE GT, GE, LT, @@ -230,7 +230,7 @@ public: // VOPC void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); - void V_CMP_NE_U64(const GcnInst& inst); + void V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_CMP_CLASS_F32(const GcnInst& inst); // VOP3a diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 3b88e4dec..54f1088f2 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -327,8 +327,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_U32(ConditionOp::TRU, false, true, inst); // V_CMP_{OP8}_U64 + case Opcode::V_CMP_EQ_U64: + return V_CMP_U64(ConditionOp::EQ, false, false, inst); case Opcode::V_CMP_NE_U64: - return V_CMP_NE_U64(inst); + return V_CMP_U64(ConditionOp::LG, false, false, inst); case Opcode::V_CMP_CLASS_F32: return V_CMP_CLASS_F32(inst); @@ -556,27 +558,31 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) { void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { if (!is_low) { - // v_mbcnt_hi_u32_b32 v2, -1, 0 + // v_mbcnt_hi_u32_b32 vX, -1, 0 if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 && inst.src[1].field == OperandField::ConstZero) { return; } - // v_mbcnt_hi_u32_b32 vX, exec_hi, 0 - if (inst.src[0].field == OperandField::ExecHi && - inst.src[1].field == OperandField::ConstZero) { - return; + // v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ + if ((inst.src[0].field == OperandField::ExecHi || + inst.src[0].field == OperandField::VccHi) && + (inst.src[1].field == OperandField::ConstZero || + inst.src[1].field == OperandField::VectorGPR)) { + return SetDst(inst.dst[0], GetSrc(inst.src[1])); } + UNREACHABLE(); } else { - // v_mbcnt_lo_u32_b32 v2, -1, vX + // v_mbcnt_lo_u32_b32 vY, -1, vX // used combined with above to fetch lane id in non-compute stages if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) { - SetDst(inst.dst[0], ir.LaneId()); + return SetDst(inst.dst[0], ir.LaneId()); } - // v_mbcnt_lo_u32_b32 v20, exec_lo, vX - // used combined in above for append buffer indexing. - if (inst.src[0].field == OperandField::ExecLo) { - SetDst(inst.dst[0], ir.Imm32(0)); + // v_mbcnt_lo_u32_b32 vY, exec_lo, vX + // used combined with above for append buffer indexing. + if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) { + return SetDst(inst.dst[0], GetSrc(inst.src[1])); } + UNREACHABLE(); } } @@ -996,39 +1002,32 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const } } -void Translator::V_CMP_NE_U64(const GcnInst& inst) { - const auto get_src = [&](const InstOperand& operand) { - switch (operand.field) { - case OperandField::VccLo: - return ir.GetVcc(); - case OperandField::ExecLo: - return ir.GetExec(); - case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); - case OperandField::ConstZero: - return ir.Imm1(false); +void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { + const IR::U64 src0{GetSrc64(inst.src[0])}; + const IR::U64 src1{GetSrc64(inst.src[1])}; + const IR::U1 result = [&] { + switch (op) { + case ConditionOp::EQ: + return ir.IEqual(src0, src1); + case ConditionOp::LG: // NE + return ir.INotEqual(src0, src1); default: - UNREACHABLE(); + UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op)); } - }; - const IR::U1 src0{get_src(inst.src[0])}; - auto op = [&inst, this](auto x) { - switch (inst.src[1].field) { - case OperandField::ConstZero: - return x; - case OperandField::SignedConstIntNeg: - return ir.LogicalNot(x); - default: - UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field)); - } - }; + }(); + + if (is_signed) { + UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported"); + } + if (set_exec) { + UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported"); + } + switch (inst.dst[1].field) { case OperandField::VccLo: - ir.SetVcc(op(src0)); - break; + return ir.SetVcc(result); case OperandField::ScalarGPR: - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0)); - break; + return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 8dcf70a07..91f545cfd 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -74,8 +74,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst); case Opcode::BUFFER_ATOMIC_SMIN: return BUFFER_ATOMIC(AtomicOp::Smin, inst); + case Opcode::BUFFER_ATOMIC_SMIN_X2: + return BUFFER_ATOMIC(AtomicOp::Smin, inst); case Opcode::BUFFER_ATOMIC_UMIN: return BUFFER_ATOMIC(AtomicOp::Umin, inst); + case Opcode::BUFFER_ATOMIC_UMIN_X2: + return BUFFER_ATOMIC(AtomicOp::Umin, inst); case Opcode::BUFFER_ATOMIC_SMAX: return BUFFER_ATOMIC(AtomicOp::Smax, inst); case Opcode::BUFFER_ATOMIC_SMAX_X2: diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 2497864c0..3d64cc5da 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -500,8 +500,16 @@ Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, con Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info) { - return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value) - : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); + switch (value.Type()) { + case Type::U32: + return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); + case Type::U64: + return is_signed ? Inst(Opcode::BufferAtomicSMin64, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMin64, Flags{info}, handle, address, value); + default: + ThrowInvalidType(value.Type()); + } } Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value, @@ -1712,12 +1720,32 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { } } -U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +U1 IREmitter::ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SLessThanEqual32 : Opcode::ULessThanEqual32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SLessThanEqual64 : Opcode::ULessThanEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } -U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +U1 IREmitter::IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SGreaterThan32 : Opcode::UGreaterThan32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SGreaterThan64 : Opcode::UGreaterThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) { @@ -1734,8 +1762,20 @@ U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) { } } -U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +U1 IREmitter::IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SGreaterThanEqual32 : Opcode::UGreaterThanEqual32, lhs, + rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SGreaterThanEqual64 : Opcode::UGreaterThanEqual64, lhs, + rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::LogicalOr(const U1& a, const U1& b) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 9e2f79978..119e3752e 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -299,10 +299,10 @@ public: [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); - [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); - [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs); - [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 8d46a0071..84bdb5739 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -70,7 +70,9 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BufferAtomicIAdd64: case Opcode::BufferAtomicISub32: case Opcode::BufferAtomicSMin32: + case Opcode::BufferAtomicSMin64: case Opcode::BufferAtomicUMin32: + case Opcode::BufferAtomicUMin64: case Opcode::BufferAtomicFMin32: case Opcode::BufferAtomicSMax32: case Opcode::BufferAtomicSMax64: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 7fc514de9..008f44659 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -124,7 +124,9 @@ OPCODE(BufferAtomicIAdd32, U32, Opaq OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMin64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMin64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax64, U64, Opaque, Opaque, U64 ) @@ -382,14 +384,20 @@ OPCODE(ULessThan32, U1, U32, OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(IEqual32, U1, U32, U32, ) OPCODE(IEqual64, U1, U64, U64, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(SLessThanEqual32, U1, U32, U32, ) +OPCODE(SLessThanEqual64, U1, U64, U64, ) +OPCODE(ULessThanEqual32, U1, U32, U32, ) +OPCODE(ULessThanEqual64, U1, U64, U64, ) +OPCODE(SGreaterThan32, U1, U32, U32, ) +OPCODE(SGreaterThan64, U1, U64, U64, ) +OPCODE(UGreaterThan32, U1, U32, U32, ) +OPCODE(UGreaterThan64, U1, U64, U64, ) OPCODE(INotEqual32, U1, U32, U32, ) OPCODE(INotEqual64, U1, U64, U64, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual32, U1, U32, U32, ) +OPCODE(SGreaterThanEqual64, U1, U64, U64, ) +OPCODE(UGreaterThanEqual32, U1, U32, U32, ) +OPCODE(UGreaterThanEqual64, U1, U64, U64, ) // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 5c66b1115..2a39d3a2e 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -381,24 +381,42 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ULessThan64: FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; }); return; - case IR::Opcode::SLessThanEqual: + case IR::Opcode::SLessThanEqual32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); return; - case IR::Opcode::ULessThanEqual: + case IR::Opcode::SLessThanEqual64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a <= b; }); + return; + case IR::Opcode::ULessThanEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); return; - case IR::Opcode::SGreaterThan: + case IR::Opcode::ULessThanEqual64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a <= b; }); + return; + case IR::Opcode::SGreaterThan32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); return; - case IR::Opcode::UGreaterThan: + case IR::Opcode::SGreaterThan64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a > b; }); + return; + case IR::Opcode::UGreaterThan32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); return; - case IR::Opcode::SGreaterThanEqual: + case IR::Opcode::UGreaterThan64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a > b; }); + return; + case IR::Opcode::SGreaterThanEqual32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); return; - case IR::Opcode::UGreaterThanEqual: + case IR::Opcode::SGreaterThanEqual64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a >= b; }); + return; + case IR::Opcode::UGreaterThanEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); return; + case IR::Opcode::UGreaterThanEqual64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a >= b; }); + return; case IR::Opcode::IEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); return; diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 57d36f6df..fdae9d3cf 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -19,7 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program); void FlattenExtendedUserdataPass(IR::Program& program); void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); -void CollectShaderInfoPass(IR::Program& program); +void CollectShaderInfoPass(IR::Program& program, const Profile& profile); void LowerBufferFormatToRaw(IR::Program& program); void LowerFp64ToFp32(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index ffb785584..d5d140c93 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -20,7 +20,9 @@ bool IsBufferAtomic(const IR::Inst& inst) { case IR::Opcode::BufferAtomicIAdd64: case IR::Opcode::BufferAtomicISub32: case IR::Opcode::BufferAtomicSMin32: + case IR::Opcode::BufferAtomicSMin64: case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicUMin64: case IR::Opcode::BufferAtomicFMin32: case IR::Opcode::BufferAtomicSMax32: case IR::Opcode::BufferAtomicSMax64: @@ -97,6 +99,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferU64: case IR::Opcode::StoreBufferU64: case IR::Opcode::BufferAtomicIAdd64: + case IR::Opcode::BufferAtomicSMax64: + case IR::Opcode::BufferAtomicSMin64: + case IR::Opcode::BufferAtomicUMax64: + case IR::Opcode::BufferAtomicUMin64: return IR::Type::U64; case IR::Opcode::LoadBufferFormatF32: case IR::Opcode::StoreBufferFormatF32: @@ -118,6 +124,10 @@ u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) { case IR::Opcode::LoadBufferU64: case IR::Opcode::StoreBufferU64: case IR::Opcode::BufferAtomicIAdd64: + case IR::Opcode::BufferAtomicSMax64: + case IR::Opcode::BufferAtomicSMin64: + case IR::Opcode::BufferAtomicUMax64: + case IR::Opcode::BufferAtomicUMin64: return 3; case IR::Opcode::LoadBufferFormatF32: case IR::Opcode::StoreBufferFormatF32: { diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 59668870b..a87dceb0a 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" #include "shader_recompiler/ir/program.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -102,7 +103,9 @@ void Visit(Info& info, const IR::Inst& inst) { break; case IR::Opcode::BufferAtomicIAdd64: case IR::Opcode::BufferAtomicSMax64: + case IR::Opcode::BufferAtomicSMin64: case IR::Opcode::BufferAtomicUMax64: + case IR::Opcode::BufferAtomicUMin64: info.uses_buffer_int64_atomics = true; break; case IR::Opcode::LaneId: @@ -136,7 +139,7 @@ void Visit(Info& info, const IR::Inst& inst) { } } -void CollectShaderInfoPass(IR::Program& program) { +void CollectShaderInfoPass(IR::Program& program, const Profile& profile) { auto& info = program.info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -144,6 +147,25 @@ void CollectShaderInfoPass(IR::Program& program) { } } + // In case Flatbuf has not already been bound by IR and is needed + // to query buffer sizes, bind it now. + if (!profile.supports_robust_buffer_access && !info.uses_dma) { + info.buffers.push_back({ + .used_types = IR::Type::U32, + // We can't guarantee that flatbuf will not grow past UBO + // limit if there are a lot of ReadConsts. (We could specialize) + .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits::max()), + .buffer_type = BufferType::Flatbuf, + }); + // In the future we may want to read buffer sizes from GPU memory if available. + // info.readconst_types |= Info::ReadConstType::Immediate; + } + + if (!Config::directMemoryAccess()) { + info.uses_dma = false; + info.readconst_types = Info::ReadConstType::None; + } + if (info.uses_dma) { info.buffers.push_back({ .used_types = IR::Type::U64, diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index e17fb1c9e..2da9e7b01 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -84,7 +84,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - Shader::Optimization::CollectShaderInfoPass(program); + Shader::Optimization::CollectShaderInfoPass(program, profile); Shader::IR::DumpProgram(program, info); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0aad0f047..514de1743 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -471,7 +471,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { uses_dma |= stage->uses_dma; } - if (uses_dma && !fault_process_pending) { + if (uses_dma) { // We only use fault buffer for DMA right now. { Common::RecursiveSharedLock lock{mapped_ranges_mutex}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c570ea368..4a978746c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -5,6 +5,7 @@ #include #include "common/recursive_lock.h" +#include "common/shared_first_mutex.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -122,7 +123,7 @@ private: AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; boost::icl::interval_set mapped_ranges; - std::shared_mutex mapped_ranges_mutex; + Common::SharedFirstMutex mapped_ranges_mutex; PipelineCache pipeline_cache; boost::container::static_vector<