Merge remote-tracking branch 'upstream/main' into config-update

This commit is contained in:
Stephen Miller 2025-07-03 12:05:17 -05:00
commit 0059ffe7a2
25 changed files with 304 additions and 104 deletions

View file

@ -689,6 +689,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/recursive_lock.cpp src/common/recursive_lock.cpp
src/common/recursive_lock.h src/common/recursive_lock.h
src/common/sha1.h src/common/sha1.h
src/common/shared_first_mutex.h
src/common/signal_context.h src/common/signal_context.h
src/common/signal_context.cpp src/common/signal_context.cpp
src/common/singleton.h src/common/singleton.h

View file

@ -65,6 +65,7 @@ static u32 screenHeight = 720;
static bool isNullGpu = false; static bool isNullGpu = false;
static bool shouldCopyGPUBuffers = false; static bool shouldCopyGPUBuffers = false;
static bool readbacksEnabled = false; static bool readbacksEnabled = false;
static bool directMemoryAccessEnabled = false;
static bool shouldDumpShaders = false; static bool shouldDumpShaders = false;
static bool shouldPatchShaders = false; static bool shouldPatchShaders = false;
static u32 vblankDivider = 1; static u32 vblankDivider = 1;
@ -102,7 +103,7 @@ u32 m_language = 1; // english
static std::string trophyKey = ""; static std::string trophyKey = "";
// Expected number of items in the config file // Expected number of items in the config file
static constexpr u64 total_entries = 50; static constexpr u64 total_entries = 51;
bool allowHDR() { bool allowHDR() {
return isHDRAllowed; return isHDRAllowed;
@ -261,6 +262,10 @@ bool readbacks() {
return readbacksEnabled; return readbacksEnabled;
} }
bool directMemoryAccess() {
return directMemoryAccessEnabled;
}
bool dumpShaders() { bool dumpShaders() {
return shouldDumpShaders; return shouldDumpShaders;
} }
@ -369,6 +374,10 @@ void setReadbacks(bool enable) {
readbacksEnabled = enable; readbacksEnabled = enable;
} }
void setDirectMemoryAccess(bool enable) {
directMemoryAccessEnabled = enable;
}
void setDumpShaders(bool enable) { void setDumpShaders(bool enable) {
shouldDumpShaders = enable; shouldDumpShaders = enable;
} }
@ -622,6 +631,7 @@ void load(const std::filesystem::path& path) {
isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu); isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu);
shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers); shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers);
readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled); readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled);
directMemoryAccessEnabled = toml::find_or<bool>(gpu, "directMemoryAccess", directMemoryAccessEnabled);
shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders); shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders);
shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", shouldPatchShaders); shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", shouldPatchShaders);
vblankDivider = toml::find_or<int>(gpu, "vblankDivider", vblankDivider); vblankDivider = toml::find_or<int>(gpu, "vblankDivider", vblankDivider);
@ -791,6 +801,7 @@ void save(const std::filesystem::path& path) {
data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["nullGpu"] = isNullGpu;
data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
data["GPU"]["readbacks"] = readbacksEnabled; data["GPU"]["readbacks"] = readbacksEnabled;
data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled;
data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpShaders"] = shouldDumpShaders;
data["GPU"]["patchShaders"] = shouldPatchShaders; data["GPU"]["patchShaders"] = shouldPatchShaders;
data["GPU"]["vblankDivider"] = vblankDivider; data["GPU"]["vblankDivider"] = vblankDivider;
@ -890,6 +901,7 @@ void setDefaultValues() {
isNullGpu = false; isNullGpu = false;
shouldCopyGPUBuffers = false; shouldCopyGPUBuffers = false;
readbacksEnabled = false; readbacksEnabled = false;
directMemoryAccessEnabled = false;
shouldDumpShaders = false; shouldDumpShaders = false;
shouldPatchShaders = false; shouldPatchShaders = false;
vblankDivider = 1; vblankDivider = 1;

View file

@ -47,6 +47,8 @@ bool copyGPUCmdBuffers();
void setCopyGPUCmdBuffers(bool enable); void setCopyGPUCmdBuffers(bool enable);
bool readbacks(); bool readbacks();
void setReadbacks(bool enable); void setReadbacks(bool enable);
bool directMemoryAccess();
void setDirectMemoryAccess(bool enable);
bool dumpShaders(); bool dumpShaders();
void setDumpShaders(bool enable); void setDumpShaders(bool enable);
u32 vblankDiv(); u32 vblankDiv();

View file

@ -0,0 +1,46 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
namespace Common {
// Like std::shared_mutex, but reader has priority over writer.
class SharedFirstMutex {
public:
void lock() {
std::unique_lock<std::mutex> lock(mtx);
cv.wait(lock, [this]() { return !writer_active && readers == 0; });
writer_active = true;
}
void unlock() {
std::lock_guard<std::mutex> lock(mtx);
writer_active = false;
cv.notify_all();
}
void lock_shared() {
std::unique_lock<std::mutex> lock(mtx);
cv.wait(lock, [this]() { return !writer_active; });
++readers;
}
void unlock_shared() {
std::lock_guard<std::mutex> lock(mtx);
if (--readers == 0) {
cv.notify_all();
}
}
private:
std::mutex mtx;
std::condition_variable cv;
int readers = 0;
bool writer_active = false;
};
} // namespace Common

View file

@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks()); LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());

View file

@ -200,10 +200,18 @@ Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
} }
Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
} }
Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
if (ctx.profile.supports_buffer_fp32_atomic_min_max) { if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
return BufferAtomicU32<true>(ctx, inst, handle, address, value, return BufferAtomicU32<true>(ctx, inst, handle, address, value,

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h" #include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@ -167,6 +168,9 @@ using PointerSize = EmitContext::PointerSize;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>(); const u32 flatbuf_off_dw = inst->Flags<u32>();
if (!Config::directMemoryAccess()) {
return ctx.EmitFlatbufferLoad(ctx.ConstU32(flatbuf_off_dw));
}
// We can only provide a fallback for immediate offsets. // We can only provide a fallback for immediate offsets.
if (flatbuf_off_dw == 0) { if (flatbuf_off_dw == 0) {
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset); return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);

View file

@ -91,7 +91,9 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@ -406,14 +408,20 @@ Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);

View file

@ -371,19 +371,35 @@ Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpIEqual(ctx.U1[1], lhs, rhs); return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
} }
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs); return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
} }
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs); return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
} }
Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs); return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
} }
Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs); return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
} }
@ -395,11 +411,19 @@ Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpINotEqual(ctx.U1[1], lhs, rhs); return ctx.OpINotEqual(ctx.U1[1], lhs, rhs);
} }
Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs); return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
} }
Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
}
Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs); return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
} }

View file

@ -784,19 +784,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
}; };
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
.used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
// In the future we may want to read buffer sizes from GPU memory if available.
// info.readconst_types |= Info::ReadConstType::Immediate;
}
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info); const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile); const bool is_storage = desc.IsStorage(buf_sharp, profile);
@ -1219,14 +1206,7 @@ Id EmitContext::DefineReadConst(bool dynamic) {
if (dynamic) { if (dynamic) {
return u32_zero_value; return u32_zero_value;
} else { } else {
const auto& flatbuf_buffer{buffers[flatbuf_index]}; return EmitFlatbufferLoad(flatbuf_offset);
ASSERT(flatbuf_buffer.binding >= 0 &&
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
flatbuf_buffer.Alias(PointerType::U32);
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
flatbuf_offset)};
return OpLoad(U32[1], ptr);
} }
}); });

View file

@ -180,6 +180,16 @@ public:
return OpAccessChain(result_type, shared_mem, index); return OpAccessChain(result_type, shared_mem, index);
} }
Id EmitFlatbufferLoad(Id flatbuf_offset) {
const auto& flatbuf_buffer{buffers[flatbuf_index]};
ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
flatbuf_buffer.aliases[u32(PointerType::U32)];
const auto ptr{
OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)};
return OpLoad(U32[1], ptr);
}
Info& info; Info& info;
const RuntimeInfo& runtime_info; const RuntimeInfo& runtime_info;
const Profile& profile; const Profile& profile;

View file

@ -20,7 +20,7 @@ namespace Shader::Gcn {
enum class ConditionOp : u32 { enum class ConditionOp : u32 {
F, F,
EQ, EQ,
LG, LG, // NE
GT, GT,
GE, GE,
LT, LT,
@ -230,7 +230,7 @@ public:
// VOPC // VOPC
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
void V_CMP_NE_U64(const GcnInst& inst); void V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
void V_CMP_CLASS_F32(const GcnInst& inst); void V_CMP_CLASS_F32(const GcnInst& inst);
// VOP3a // VOP3a

View file

@ -327,8 +327,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CMP_U32(ConditionOp::TRU, false, true, inst); return V_CMP_U32(ConditionOp::TRU, false, true, inst);
// V_CMP_{OP8}_U64 // V_CMP_{OP8}_U64
case Opcode::V_CMP_EQ_U64:
return V_CMP_U64(ConditionOp::EQ, false, false, inst);
case Opcode::V_CMP_NE_U64: case Opcode::V_CMP_NE_U64:
return V_CMP_NE_U64(inst); return V_CMP_U64(ConditionOp::LG, false, false, inst);
case Opcode::V_CMP_CLASS_F32: case Opcode::V_CMP_CLASS_F32:
return V_CMP_CLASS_F32(inst); return V_CMP_CLASS_F32(inst);
@ -556,27 +558,31 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
if (!is_low) { if (!is_low) {
// v_mbcnt_hi_u32_b32 v2, -1, 0 // v_mbcnt_hi_u32_b32 vX, -1, 0
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 && if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
inst.src[1].field == OperandField::ConstZero) { inst.src[1].field == OperandField::ConstZero) {
return; return;
} }
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0 // v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
if (inst.src[0].field == OperandField::ExecHi && if ((inst.src[0].field == OperandField::ExecHi ||
inst.src[1].field == OperandField::ConstZero) { inst.src[0].field == OperandField::VccHi) &&
return; (inst.src[1].field == OperandField::ConstZero ||
inst.src[1].field == OperandField::VectorGPR)) {
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
} }
UNREACHABLE();
} else { } else {
// v_mbcnt_lo_u32_b32 v2, -1, vX // v_mbcnt_lo_u32_b32 vY, -1, vX
// used combined with above to fetch lane id in non-compute stages // used combined with above to fetch lane id in non-compute stages
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) { if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
SetDst(inst.dst[0], ir.LaneId()); return SetDst(inst.dst[0], ir.LaneId());
} }
// v_mbcnt_lo_u32_b32 v20, exec_lo, vX // v_mbcnt_lo_u32_b32 vY, exec_lo, vX
// used combined in above for append buffer indexing. // used combined with above for append buffer indexing.
if (inst.src[0].field == OperandField::ExecLo) { if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
SetDst(inst.dst[0], ir.Imm32(0)); return SetDst(inst.dst[0], GetSrc(inst.src[1]));
} }
UNREACHABLE();
} }
} }
@ -996,39 +1002,32 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const
} }
} }
void Translator::V_CMP_NE_U64(const GcnInst& inst) { void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) { const IR::U64 src0{GetSrc64(inst.src[0])};
switch (operand.field) { const IR::U64 src1{GetSrc64(inst.src[1])};
case OperandField::VccLo: const IR::U1 result = [&] {
return ir.GetVcc(); switch (op) {
case OperandField::ExecLo: case ConditionOp::EQ:
return ir.GetExec(); return ir.IEqual(src0, src1);
case OperandField::ScalarGPR: case ConditionOp::LG: // NE
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); return ir.INotEqual(src0, src1);
case OperandField::ConstZero:
return ir.Imm1(false);
default: default:
UNREACHABLE(); UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op));
} }
}; }();
const IR::U1 src0{get_src(inst.src[0])};
auto op = [&inst, this](auto x) { if (is_signed) {
switch (inst.src[1].field) { UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported");
case OperandField::ConstZero:
return x;
case OperandField::SignedConstIntNeg:
return ir.LogicalNot(x);
default:
UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field));
} }
}; if (set_exec) {
UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported");
}
switch (inst.dst[1].field) { switch (inst.dst[1].field) {
case OperandField::VccLo: case OperandField::VccLo:
ir.SetVcc(op(src0)); return ir.SetVcc(result);
break;
case OperandField::ScalarGPR: case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0)); return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
break;
default: default:
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -74,8 +74,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst); return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
case Opcode::BUFFER_ATOMIC_SMIN: case Opcode::BUFFER_ATOMIC_SMIN:
return BUFFER_ATOMIC(AtomicOp::Smin, inst); return BUFFER_ATOMIC(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_SMIN_X2:
return BUFFER_ATOMIC<IR::U64>(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_UMIN: case Opcode::BUFFER_ATOMIC_UMIN:
return BUFFER_ATOMIC(AtomicOp::Umin, inst); return BUFFER_ATOMIC(AtomicOp::Umin, inst);
case Opcode::BUFFER_ATOMIC_UMIN_X2:
return BUFFER_ATOMIC<IR::U64>(AtomicOp::Umin, inst);
case Opcode::BUFFER_ATOMIC_SMAX: case Opcode::BUFFER_ATOMIC_SMAX:
return BUFFER_ATOMIC(AtomicOp::Smax, inst); return BUFFER_ATOMIC(AtomicOp::Smax, inst);
case Opcode::BUFFER_ATOMIC_SMAX_X2: case Opcode::BUFFER_ATOMIC_SMAX_X2:

View file

@ -500,8 +500,16 @@ Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, con
Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) { bool is_signed, BufferInstInfo info) {
switch (value.Type()) {
case Type::U32:
return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value) return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
case Type::U64:
return is_signed ? Inst(Opcode::BufferAtomicSMin64, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMin64, Flags{info}, handle, address, value);
default:
ThrowInvalidType(value.Type());
}
} }
Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value,
@ -1712,12 +1720,32 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
} }
} }
U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { U1 IREmitter::ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SLessThanEqual32 : Opcode::ULessThanEqual32, lhs, rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SLessThanEqual64 : Opcode::ULessThanEqual64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
} }
U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { U1 IREmitter::IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SGreaterThan32 : Opcode::UGreaterThan32, lhs, rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SGreaterThan64 : Opcode::UGreaterThan64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
} }
U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) { U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
@ -1734,8 +1762,20 @@ U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
} }
} }
U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { U1 IREmitter::IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual32 : Opcode::UGreaterThanEqual32, lhs,
rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual64 : Opcode::UGreaterThanEqual64, lhs,
rhs);
default:
ThrowInvalidType(lhs.Type());
}
} }
U1 IREmitter::LogicalOr(const U1& a, const U1& b) { U1 IREmitter::LogicalOr(const U1& a, const U1& b) {

View file

@ -299,10 +299,10 @@ public:
[[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);

View file

@ -70,7 +70,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::BufferAtomicIAdd64: case Opcode::BufferAtomicIAdd64:
case Opcode::BufferAtomicISub32: case Opcode::BufferAtomicISub32:
case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicSMin64:
case Opcode::BufferAtomicUMin32: case Opcode::BufferAtomicUMin32:
case Opcode::BufferAtomicUMin64:
case Opcode::BufferAtomicFMin32: case Opcode::BufferAtomicFMin32:
case Opcode::BufferAtomicSMax32: case Opcode::BufferAtomicSMax32:
case Opcode::BufferAtomicSMax64: case Opcode::BufferAtomicSMax64:

View file

@ -124,7 +124,9 @@ OPCODE(BufferAtomicIAdd32, U32, Opaq
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 ) OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicSMax64, U64, Opaque, Opaque, U64 )
@ -382,14 +384,20 @@ OPCODE(ULessThan32, U1, U32,
OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(ULessThan64, U1, U64, U64, )
OPCODE(IEqual32, U1, U32, U32, ) OPCODE(IEqual32, U1, U32, U32, )
OPCODE(IEqual64, U1, U64, U64, ) OPCODE(IEqual64, U1, U64, U64, )
OPCODE(SLessThanEqual, U1, U32, U32, ) OPCODE(SLessThanEqual32, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, ) OPCODE(SLessThanEqual64, U1, U64, U64, )
OPCODE(SGreaterThan, U1, U32, U32, ) OPCODE(ULessThanEqual32, U1, U32, U32, )
OPCODE(UGreaterThan, U1, U32, U32, ) OPCODE(ULessThanEqual64, U1, U64, U64, )
OPCODE(SGreaterThan32, U1, U32, U32, )
OPCODE(SGreaterThan64, U1, U64, U64, )
OPCODE(UGreaterThan32, U1, U32, U32, )
OPCODE(UGreaterThan64, U1, U64, U64, )
OPCODE(INotEqual32, U1, U32, U32, ) OPCODE(INotEqual32, U1, U32, U32, )
OPCODE(INotEqual64, U1, U64, U64, ) OPCODE(INotEqual64, U1, U64, U64, )
OPCODE(SGreaterThanEqual, U1, U32, U32, ) OPCODE(SGreaterThanEqual32, U1, U32, U32, )
OPCODE(UGreaterThanEqual, U1, U32, U32, ) OPCODE(SGreaterThanEqual64, U1, U64, U64, )
OPCODE(UGreaterThanEqual32, U1, U32, U32, )
OPCODE(UGreaterThanEqual64, U1, U64, U64, )
// Logical operations // Logical operations
OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalOr, U1, U1, U1, )

View file

@ -381,24 +381,42 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::ULessThan64: case IR::Opcode::ULessThan64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; }); FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; });
return; return;
case IR::Opcode::SLessThanEqual: case IR::Opcode::SLessThanEqual32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
return; return;
case IR::Opcode::ULessThanEqual: case IR::Opcode::SLessThanEqual64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a <= b; });
return;
case IR::Opcode::ULessThanEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
return; return;
case IR::Opcode::SGreaterThan: case IR::Opcode::ULessThanEqual64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a <= b; });
return;
case IR::Opcode::SGreaterThan32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
return; return;
case IR::Opcode::UGreaterThan: case IR::Opcode::SGreaterThan64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a > b; });
return;
case IR::Opcode::UGreaterThan32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
return; return;
case IR::Opcode::SGreaterThanEqual: case IR::Opcode::UGreaterThan64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a > b; });
return;
case IR::Opcode::SGreaterThanEqual32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
return; return;
case IR::Opcode::UGreaterThanEqual: case IR::Opcode::SGreaterThanEqual64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a >= b; });
return;
case IR::Opcode::UGreaterThanEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
return; return;
case IR::Opcode::UGreaterThanEqual64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a >= b; });
return;
case IR::Opcode::IEqual32: case IR::Opcode::IEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
return; return;

View file

@ -19,7 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program); void FlattenExtendedUserdataPass(IR::Program& program);
void ReadLaneEliminationPass(IR::Program& program); void ReadLaneEliminationPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
void LowerBufferFormatToRaw(IR::Program& program); void LowerBufferFormatToRaw(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program); void LowerFp64ToFp32(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);

View file

@ -20,7 +20,9 @@ bool IsBufferAtomic(const IR::Inst& inst) {
case IR::Opcode::BufferAtomicIAdd64: case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicISub32: case IR::Opcode::BufferAtomicISub32:
case IR::Opcode::BufferAtomicSMin32: case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMin32: case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicUMin64:
case IR::Opcode::BufferAtomicFMin32: case IR::Opcode::BufferAtomicFMin32:
case IR::Opcode::BufferAtomicSMax32: case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicSMax64: case IR::Opcode::BufferAtomicSMax64:
@ -97,6 +99,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::LoadBufferU64: case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64: case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64: case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return IR::Type::U64; return IR::Type::U64;
case IR::Opcode::LoadBufferFormatF32: case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: case IR::Opcode::StoreBufferFormatF32:
@ -118,6 +124,10 @@ u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
case IR::Opcode::LoadBufferU64: case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64: case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64: case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return 3; return 3;
case IR::Opcode::LoadBufferFormatF32: case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: { case IR::Opcode::StoreBufferFormatF32: {

View file

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -102,7 +103,9 @@ void Visit(Info& info, const IR::Inst& inst) {
break; break;
case IR::Opcode::BufferAtomicIAdd64: case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64: case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64: case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
info.uses_buffer_int64_atomics = true; info.uses_buffer_int64_atomics = true;
break; break;
case IR::Opcode::LaneId: case IR::Opcode::LaneId:
@ -136,7 +139,7 @@ void Visit(Info& info, const IR::Inst& inst) {
} }
} }
void CollectShaderInfoPass(IR::Program& program) { void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
auto& info = program.info; auto& info = program.info;
for (IR::Block* const block : program.post_order_blocks) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
@ -144,6 +147,25 @@ void CollectShaderInfoPass(IR::Program& program) {
} }
} }
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
info.buffers.push_back({
.used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
// In the future we may want to read buffer sizes from GPU memory if available.
// info.readconst_types |= Info::ReadConstType::Immediate;
}
if (!Config::directMemoryAccess()) {
info.uses_dma = false;
info.readconst_types = Info::ReadConstType::None;
}
if (info.uses_dma) { if (info.uses_dma) {
info.buffers.push_back({ info.buffers.push_back({
.used_types = IR::Type::U64, .used_types = IR::Type::U64,

View file

@ -84,7 +84,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::CollectShaderInfoPass(program); Shader::Optimization::CollectShaderInfoPass(program, profile);
Shader::IR::DumpProgram(program, info); Shader::IR::DumpProgram(program, info);

View file

@ -471,7 +471,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
uses_dma |= stage->uses_dma; uses_dma |= stage->uses_dma;
} }
if (uses_dma && !fault_process_pending) { if (uses_dma) {
// We only use fault buffer for DMA right now. // We only use fault buffer for DMA right now.
{ {
Common::RecursiveSharedLock lock{mapped_ranges_mutex}; Common::RecursiveSharedLock lock{mapped_ranges_mutex};

View file

@ -5,6 +5,7 @@
#include <shared_mutex> #include <shared_mutex>
#include "common/recursive_lock.h" #include "common/recursive_lock.h"
#include "common/shared_first_mutex.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@ -122,7 +123,7 @@ private:
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory; Core::MemoryManager* memory;
boost::icl::interval_set<VAddr> mapped_ranges; boost::icl::interval_set<VAddr> mapped_ranges;
std::shared_mutex mapped_ranges_mutex; Common::SharedFirstMutex mapped_ranges_mutex;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;
boost::container::static_vector< boost::container::static_vector<