Merge remote-tracking branch 'upstream/main' into config-update

This commit is contained in:
Stephen Miller 2025-07-03 12:05:17 -05:00
commit 0059ffe7a2
25 changed files with 304 additions and 104 deletions

View file

@ -689,6 +689,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/recursive_lock.cpp
src/common/recursive_lock.h
src/common/sha1.h
src/common/shared_first_mutex.h
src/common/signal_context.h
src/common/signal_context.cpp
src/common/singleton.h

View file

@ -65,6 +65,7 @@ static u32 screenHeight = 720;
static bool isNullGpu = false;
static bool shouldCopyGPUBuffers = false;
static bool readbacksEnabled = false;
static bool directMemoryAccessEnabled = false;
static bool shouldDumpShaders = false;
static bool shouldPatchShaders = false;
static u32 vblankDivider = 1;
@ -102,7 +103,7 @@ u32 m_language = 1; // english
static std::string trophyKey = "";
// Expected number of items in the config file
static constexpr u64 total_entries = 50;
static constexpr u64 total_entries = 51;
bool allowHDR() {
return isHDRAllowed;
@ -261,6 +262,10 @@ bool readbacks() {
return readbacksEnabled;
}
bool directMemoryAccess() {
return directMemoryAccessEnabled;
}
bool dumpShaders() {
return shouldDumpShaders;
}
@ -369,6 +374,10 @@ void setReadbacks(bool enable) {
readbacksEnabled = enable;
}
void setDirectMemoryAccess(bool enable) {
directMemoryAccessEnabled = enable;
}
void setDumpShaders(bool enable) {
shouldDumpShaders = enable;
}
@ -622,6 +631,7 @@ void load(const std::filesystem::path& path) {
isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu);
shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers);
readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled);
directMemoryAccessEnabled = toml::find_or<bool>(gpu, "directMemoryAccess", directMemoryAccessEnabled);
shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders);
shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", shouldPatchShaders);
vblankDivider = toml::find_or<int>(gpu, "vblankDivider", vblankDivider);
@ -791,6 +801,7 @@ void save(const std::filesystem::path& path) {
data["GPU"]["nullGpu"] = isNullGpu;
data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
data["GPU"]["readbacks"] = readbacksEnabled;
data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled;
data["GPU"]["dumpShaders"] = shouldDumpShaders;
data["GPU"]["patchShaders"] = shouldPatchShaders;
data["GPU"]["vblankDivider"] = vblankDivider;
@ -890,6 +901,7 @@ void setDefaultValues() {
isNullGpu = false;
shouldCopyGPUBuffers = false;
readbacksEnabled = false;
directMemoryAccessEnabled = false;
shouldDumpShaders = false;
shouldPatchShaders = false;
vblankDivider = 1;

View file

@ -47,6 +47,8 @@ bool copyGPUCmdBuffers();
void setCopyGPUCmdBuffers(bool enable);
bool readbacks();
void setReadbacks(bool enable);
bool directMemoryAccess();
void setDirectMemoryAccess(bool enable);
bool dumpShaders();
void setDumpShaders(bool enable);
u32 vblankDiv();

View file

@ -0,0 +1,46 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
namespace Common {
// Like std::shared_mutex, but reader has priority over writer.
class SharedFirstMutex {
public:
void lock() {
std::unique_lock<std::mutex> lock(mtx);
cv.wait(lock, [this]() { return !writer_active && readers == 0; });
writer_active = true;
}
void unlock() {
std::lock_guard<std::mutex> lock(mtx);
writer_active = false;
cv.notify_all();
}
void lock_shared() {
std::unique_lock<std::mutex> lock(mtx);
cv.wait(lock, [this]() { return !writer_active; });
++readers;
}
void unlock_shared() {
std::lock_guard<std::mutex> lock(mtx);
if (--readers == 0) {
cv.notify_all();
}
}
private:
std::mutex mtx;
std::condition_variable cv;
int readers = 0;
bool writer_active = false;
};
} // namespace Common

View file

@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());

View file

@ -200,10 +200,18 @@ Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
return BufferAtomicU32<true>(ctx, inst, handle, address, value,

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@ -167,6 +168,9 @@ using PointerSize = EmitContext::PointerSize;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
if (!Config::directMemoryAccess()) {
return ctx.EmitFlatbufferLoad(ctx.ConstU32(flatbuf_off_dw));
}
// We can only provide a fallback for immediate offsets.
if (flatbuf_off_dw == 0) {
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);

View file

@ -91,7 +91,9 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@ -406,14 +408,20 @@ Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);

View file

@ -371,19 +371,35 @@ Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
}
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
}
@ -395,11 +411,19 @@ Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpINotEqual(ctx.U1[1], lhs, rhs);
}
Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
}

View file

@ -784,19 +784,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
};
void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
.used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
// In the future we may want to read buffer sizes from GPU memory if available.
// info.readconst_types |= Info::ReadConstType::Immediate;
}
for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile);
@ -1219,14 +1206,7 @@ Id EmitContext::DefineReadConst(bool dynamic) {
if (dynamic) {
return u32_zero_value;
} else {
const auto& flatbuf_buffer{buffers[flatbuf_index]};
ASSERT(flatbuf_buffer.binding >= 0 &&
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
flatbuf_buffer.Alias(PointerType::U32);
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
flatbuf_offset)};
return OpLoad(U32[1], ptr);
return EmitFlatbufferLoad(flatbuf_offset);
}
});

View file

@ -180,6 +180,16 @@ public:
return OpAccessChain(result_type, shared_mem, index);
}
Id EmitFlatbufferLoad(Id flatbuf_offset) {
const auto& flatbuf_buffer{buffers[flatbuf_index]};
ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
flatbuf_buffer.aliases[u32(PointerType::U32)];
const auto ptr{
OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)};
return OpLoad(U32[1], ptr);
}
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;

View file

@ -20,7 +20,7 @@ namespace Shader::Gcn {
enum class ConditionOp : u32 {
F,
EQ,
LG,
LG, // NE
GT,
GE,
LT,
@ -230,7 +230,7 @@ public:
// VOPC
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
void V_CMP_NE_U64(const GcnInst& inst);
void V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
void V_CMP_CLASS_F32(const GcnInst& inst);
// VOP3a

View file

@ -327,8 +327,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
// V_CMP_{OP8}_U64
case Opcode::V_CMP_EQ_U64:
return V_CMP_U64(ConditionOp::EQ, false, false, inst);
case Opcode::V_CMP_NE_U64:
return V_CMP_NE_U64(inst);
return V_CMP_U64(ConditionOp::LG, false, false, inst);
case Opcode::V_CMP_CLASS_F32:
return V_CMP_CLASS_F32(inst);
@ -556,27 +558,31 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
if (!is_low) {
// v_mbcnt_hi_u32_b32 v2, -1, 0
// v_mbcnt_hi_u32_b32 vX, -1, 0
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
inst.src[1].field == OperandField::ConstZero) {
return;
}
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0
if (inst.src[0].field == OperandField::ExecHi &&
inst.src[1].field == OperandField::ConstZero) {
return;
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
if ((inst.src[0].field == OperandField::ExecHi ||
inst.src[0].field == OperandField::VccHi) &&
(inst.src[1].field == OperandField::ConstZero ||
inst.src[1].field == OperandField::VectorGPR)) {
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
}
UNREACHABLE();
} else {
// v_mbcnt_lo_u32_b32 v2, -1, vX
// v_mbcnt_lo_u32_b32 vY, -1, vX
// used combined with above to fetch lane id in non-compute stages
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
SetDst(inst.dst[0], ir.LaneId());
return SetDst(inst.dst[0], ir.LaneId());
}
// v_mbcnt_lo_u32_b32 v20, exec_lo, vX
// used combined in above for append buffer indexing.
if (inst.src[0].field == OperandField::ExecLo) {
SetDst(inst.dst[0], ir.Imm32(0));
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
// used combined with above for append buffer indexing.
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
}
UNREACHABLE();
}
}
@ -996,39 +1002,32 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const
}
}
void Translator::V_CMP_NE_U64(const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ExecLo:
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
case OperandField::ConstZero:
return ir.Imm1(false);
void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U64 src1{GetSrc64(inst.src[1])};
const IR::U1 result = [&] {
switch (op) {
case ConditionOp::EQ:
return ir.IEqual(src0, src1);
case ConditionOp::LG: // NE
return ir.INotEqual(src0, src1);
default:
UNREACHABLE();
UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op));
}
};
const IR::U1 src0{get_src(inst.src[0])};
auto op = [&inst, this](auto x) {
switch (inst.src[1].field) {
case OperandField::ConstZero:
return x;
case OperandField::SignedConstIntNeg:
return ir.LogicalNot(x);
default:
UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field));
}();
if (is_signed) {
UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported");
}
};
if (set_exec) {
UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported");
}
switch (inst.dst[1].field) {
case OperandField::VccLo:
ir.SetVcc(op(src0));
break;
return ir.SetVcc(result);
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0));
break;
return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
default:
UNREACHABLE();
}

View file

@ -74,8 +74,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
case Opcode::BUFFER_ATOMIC_SMIN:
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_SMIN_X2:
return BUFFER_ATOMIC<IR::U64>(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_UMIN:
return BUFFER_ATOMIC(AtomicOp::Umin, inst);
case Opcode::BUFFER_ATOMIC_UMIN_X2:
return BUFFER_ATOMIC<IR::U64>(AtomicOp::Umin, inst);
case Opcode::BUFFER_ATOMIC_SMAX:
return BUFFER_ATOMIC(AtomicOp::Smax, inst);
case Opcode::BUFFER_ATOMIC_SMAX_X2:

View file

@ -500,8 +500,16 @@ Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, con
Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) {
switch (value.Type()) {
case Type::U32:
return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
case Type::U64:
return is_signed ? Inst(Opcode::BufferAtomicSMin64, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMin64, Flags{info}, handle, address, value);
default:
ThrowInvalidType(value.Type());
}
}
Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value,
@ -1712,12 +1720,32 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
}
}
U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
U1 IREmitter::ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SLessThanEqual32 : Opcode::ULessThanEqual32, lhs, rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SLessThanEqual64 : Opcode::ULessThanEqual64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
U1 IREmitter::IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SGreaterThan32 : Opcode::UGreaterThan32, lhs, rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SGreaterThan64 : Opcode::UGreaterThan64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
@ -1734,8 +1762,20 @@ U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
}
}
U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
U1 IREmitter::IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual32 : Opcode::UGreaterThanEqual32, lhs,
rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual64 : Opcode::UGreaterThanEqual64, lhs,
rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::LogicalOr(const U1& a, const U1& b) {

View file

@ -299,10 +299,10 @@ public:
[[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);

View file

@ -70,7 +70,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::BufferAtomicIAdd64:
case Opcode::BufferAtomicISub32:
case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicSMin64:
case Opcode::BufferAtomicUMin32:
case Opcode::BufferAtomicUMin64:
case Opcode::BufferAtomicFMin32:
case Opcode::BufferAtomicSMax32:
case Opcode::BufferAtomicSMax64:

View file

@ -124,7 +124,9 @@ OPCODE(BufferAtomicIAdd32, U32, Opaq
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax64, U64, Opaque, Opaque, U64 )
@ -382,14 +384,20 @@ OPCODE(ULessThan32, U1, U32,
OPCODE(ULessThan64, U1, U64, U64, )
OPCODE(IEqual32, U1, U32, U32, )
OPCODE(IEqual64, U1, U64, U64, )
OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, )
OPCODE(SGreaterThan, U1, U32, U32, )
OPCODE(UGreaterThan, U1, U32, U32, )
OPCODE(SLessThanEqual32, U1, U32, U32, )
OPCODE(SLessThanEqual64, U1, U64, U64, )
OPCODE(ULessThanEqual32, U1, U32, U32, )
OPCODE(ULessThanEqual64, U1, U64, U64, )
OPCODE(SGreaterThan32, U1, U32, U32, )
OPCODE(SGreaterThan64, U1, U64, U64, )
OPCODE(UGreaterThan32, U1, U32, U32, )
OPCODE(UGreaterThan64, U1, U64, U64, )
OPCODE(INotEqual32, U1, U32, U32, )
OPCODE(INotEqual64, U1, U64, U64, )
OPCODE(SGreaterThanEqual, U1, U32, U32, )
OPCODE(UGreaterThanEqual, U1, U32, U32, )
OPCODE(SGreaterThanEqual32, U1, U32, U32, )
OPCODE(SGreaterThanEqual64, U1, U64, U64, )
OPCODE(UGreaterThanEqual32, U1, U32, U32, )
OPCODE(UGreaterThanEqual64, U1, U64, U64, )
// Logical operations
OPCODE(LogicalOr, U1, U1, U1, )

View file

@ -381,24 +381,42 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::ULessThan64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; });
return;
case IR::Opcode::SLessThanEqual:
case IR::Opcode::SLessThanEqual32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
return;
case IR::Opcode::ULessThanEqual:
case IR::Opcode::SLessThanEqual64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a <= b; });
return;
case IR::Opcode::ULessThanEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
return;
case IR::Opcode::SGreaterThan:
case IR::Opcode::ULessThanEqual64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a <= b; });
return;
case IR::Opcode::SGreaterThan32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
return;
case IR::Opcode::UGreaterThan:
case IR::Opcode::SGreaterThan64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a > b; });
return;
case IR::Opcode::UGreaterThan32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
return;
case IR::Opcode::SGreaterThanEqual:
case IR::Opcode::UGreaterThan64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a > b; });
return;
case IR::Opcode::SGreaterThanEqual32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
return;
case IR::Opcode::UGreaterThanEqual:
case IR::Opcode::SGreaterThanEqual64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a >= b; });
return;
case IR::Opcode::UGreaterThanEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
return;
case IR::Opcode::UGreaterThanEqual64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a >= b; });
return;
case IR::Opcode::IEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
return;

View file

@ -19,7 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program);
void ReadLaneEliminationPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
void LowerBufferFormatToRaw(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);

View file

@ -20,7 +20,9 @@ bool IsBufferAtomic(const IR::Inst& inst) {
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicISub32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicUMin64:
case IR::Opcode::BufferAtomicFMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicSMax64:
@ -97,6 +99,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return IR::Type::U64;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32:
@ -118,6 +124,10 @@ u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return 3;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: {

View file

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "shader_recompiler/ir/program.h"
#include "video_core/buffer_cache/buffer_cache.h"
@ -102,7 +103,9 @@ void Visit(Info& info, const IR::Inst& inst) {
break;
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
info.uses_buffer_int64_atomics = true;
break;
case IR::Opcode::LaneId:
@ -136,7 +139,7 @@ void Visit(Info& info, const IR::Inst& inst) {
}
}
void CollectShaderInfoPass(IR::Program& program) {
void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
auto& info = program.info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@ -144,6 +147,25 @@ void CollectShaderInfoPass(IR::Program& program) {
}
}
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
info.buffers.push_back({
.used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
// In the future we may want to read buffer sizes from GPU memory if available.
// info.readconst_types |= Info::ReadConstType::Immediate;
}
if (!Config::directMemoryAccess()) {
info.uses_dma = false;
info.readconst_types = Info::ReadConstType::None;
}
if (info.uses_dma) {
info.buffers.push_back({
.used_types = IR::Type::U64,

View file

@ -84,7 +84,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::CollectShaderInfoPass(program);
Shader::Optimization::CollectShaderInfoPass(program, profile);
Shader::IR::DumpProgram(program, info);

View file

@ -471,7 +471,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
uses_dma |= stage->uses_dma;
}
if (uses_dma && !fault_process_pending) {
if (uses_dma) {
// We only use fault buffer for DMA right now.
{
Common::RecursiveSharedLock lock{mapped_ranges_mutex};

View file

@ -5,6 +5,7 @@
#include <shared_mutex>
#include "common/recursive_lock.h"
#include "common/shared_first_mutex.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@ -122,7 +123,7 @@ private:
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
boost::icl::interval_set<VAddr> mapped_ranges;
std::shared_mutex mapped_ranges_mutex;
Common::SharedFirstMutex mapped_ranges_mutex;
PipelineCache pipeline_cache;
boost::container::static_vector<