mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-25 12:55:00 +00:00
shader_recompiler: Remove special case buffers and add support for aliasing (#2428)
* shader_recompiler: Move shared mem lowering into emitter * IR can be quite verbose during first stages of translation, before ssa and constant prop passes have run that drastically simplify it. This lowering can also be done during emission so why not do it then to save some compilation time * runtime_info: Pack PsColorBuffer into 8 bytes * Drops the size of the total structure by half from 396 to 204 bytes. Also should make comparison of the array a bit faster, since its a hot path done every draw * emit_spirv_context: Add infrastructure for buffer aliases * Splits out the buffer creation function so it can be reused when defining multiple type aliases * shader_recompiler: Merge srt_flatbuf into buffers list * Its no longer a special case, yay * shader_recompiler: Complete buffer aliasing support * Add a bunch more types into buffers, such as F32 for float reads/writes and 8/16 bit integer types for formatted buffers * shader_recompiler: Remove existing shared memory emulation * The current impl relies on backend side implementaton and hooking into every shared memory access. It also doesnt handle atomics. Will be replaced by an IR pass that solves these issues * shader_recompiler: Reintroduce shared memory on ssbo emulation * Now it is performed with an IR pass, and combined with the previous commit cleanup, is fully transparent from the backend, other than requiring workgroup_index be provided as an attribute (computing this on every shared memory access is gonna be too verbose * clang format * buffer_cache: Reduce buffer sizes * vk_rasterizer: Cleanup resource binding code * Reduce noise in the functions, also remove some arguments which are class members * Fix gcc
This commit is contained in:
parent
290e127a4f
commit
82cacec8eb
36 changed files with 675 additions and 625 deletions
|
@ -766,11 +766,11 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ir_passes.h
|
src/shader_recompiler/ir/passes/ir_passes.h
|
||||||
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
|
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
|
||||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
|
||||||
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
||||||
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
|
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
|
||||||
|
src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||||
src/shader_recompiler/ir/abstract_syntax_list.h
|
src/shader_recompiler/ir/abstract_syntax_list.h
|
||||||
src/shader_recompiler/ir/attribute.cpp
|
src/shader_recompiler/ir/attribute.cpp
|
||||||
|
|
|
@ -242,14 +242,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||||
ctx.AddCapability(spv::Capability::Image1D);
|
ctx.AddCapability(spv::Capability::Image1D);
|
||||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||||
|
ctx.AddCapability(spv::Capability::Int8);
|
||||||
|
ctx.AddCapability(spv::Capability::Int16);
|
||||||
|
ctx.AddCapability(spv::Capability::Int64);
|
||||||
|
ctx.AddCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
|
||||||
|
ctx.AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||||
if (info.uses_fp16) {
|
if (info.uses_fp16) {
|
||||||
ctx.AddCapability(spv::Capability::Float16);
|
ctx.AddCapability(spv::Capability::Float16);
|
||||||
ctx.AddCapability(spv::Capability::Int16);
|
|
||||||
}
|
}
|
||||||
if (info.uses_fp64) {
|
if (info.uses_fp64) {
|
||||||
ctx.AddCapability(spv::Capability::Float64);
|
ctx.AddCapability(spv::Capability::Float64);
|
||||||
}
|
}
|
||||||
ctx.AddCapability(spv::Capability::Int64);
|
|
||||||
if (info.has_storage_images) {
|
if (info.has_storage_images) {
|
||||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||||
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||||
|
|
|
@ -23,10 +23,13 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
||||||
|
|
||||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
const auto& buffer = ctx.buffers[handle];
|
||||||
|
if (Sirit::ValidId(buffer.offset)) {
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
|
}
|
||||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
|
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||||
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||||
}
|
}
|
||||||
|
@ -165,17 +168,17 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||||
auto& buffer = ctx.buffers[binding];
|
const auto& buffer = ctx.buffers[binding];
|
||||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
|
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||||
ctx.ConstU32(gds_addr));
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
|
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||||
auto& buffer = ctx.buffers[binding];
|
const auto& buffer = ctx.buffers[binding];
|
||||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
|
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||||
ctx.ConstU32(gds_addr));
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
|
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,21 +160,25 @@ void EmitGetGotoVariable(EmitContext&) {
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using BufferAlias = EmitContext::BufferAlias;
|
||||||
|
|
||||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||||
u32 flatbuf_off_dw = inst->Flags<u32>();
|
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||||
ASSERT(ctx.srt_flatbuf.binding >= 0);
|
const auto& srt_flatbuf = ctx.buffers.back();
|
||||||
ASSERT(flatbuf_off_dw > 0);
|
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
|
||||||
Id index = ctx.ConstU32(flatbuf_off_dw);
|
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||||
auto& buffer = ctx.srt_flatbuf;
|
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
const Id ptr{
|
||||||
|
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
|
||||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
const auto& buffer = ctx.buffers[handle];
|
||||||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||||
|
@ -184,7 +188,7 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||||
if (IR::IsPosition(attr)) {
|
if (IR::IsPosition(attr)) {
|
||||||
ASSERT(attr == IR::Attribute::Position0);
|
ASSERT(attr == IR::Attribute::Position0);
|
||||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||||
|
@ -285,6 +289,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
return EmitReadStepRate(ctx, 0);
|
return EmitReadStepRate(ctx, 0);
|
||||||
case IR::Attribute::InstanceId1:
|
case IR::Attribute::InstanceId1:
|
||||||
return EmitReadStepRate(ctx, 1);
|
return EmitReadStepRate(ctx, 1);
|
||||||
|
case IR::Attribute::WorkgroupIndex:
|
||||||
|
return ctx.workgroup_index_id;
|
||||||
case IR::Attribute::WorkgroupId:
|
case IR::Attribute::WorkgroupId:
|
||||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
||||||
case IR::Attribute::LocalInvocationId:
|
case IR::Attribute::LocalInvocationId:
|
||||||
|
@ -396,140 +402,158 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <u32 N>
|
template <u32 N, BufferAlias alias>
|
||||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
const auto& spv_buffer = ctx.buffers[handle];
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||||
|
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||||
|
}
|
||||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
|
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||||
|
const auto [id, pointer_type] = spv_buffer[alias];
|
||||||
if constexpr (N == 1) {
|
if constexpr (N == 1) {
|
||||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
return ctx.OpLoad(data_types[1], ptr);
|
||||||
} else {
|
} else {
|
||||||
boost::container::static_vector<Id, N> ids;
|
boost::container::static_vector<Id, N> ids;
|
||||||
for (u32 i = 0; i < N; i++) {
|
for (u32 i = 0; i < N; i++) {
|
||||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||||
const Id ptr{
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i)};
|
||||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i)};
|
ids.push_back(ctx.OpLoad(data_types[1], ptr));
|
||||||
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
|
|
||||||
}
|
}
|
||||||
return ctx.OpCompositeConstruct(buffer.data_types->Get(N), ids);
|
return ctx.OpCompositeConstruct(data_types[N], ids);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
|
const auto& spv_buffer = ctx.buffers[handle];
|
||||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||||
return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(8u));
|
}
|
||||||
|
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||||
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
|
const auto& spv_buffer = ctx.buffers[handle];
|
||||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||||
return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(16u));
|
}
|
||||||
|
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||||
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||||
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||||
return EmitLoadBufferU32xN<1>(ctx, handle, address);
|
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||||
return EmitLoadBufferU32xN<2>(ctx, handle, address);
|
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||||
return EmitLoadBufferU32xN<3>(ctx, handle, address);
|
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||||
return EmitLoadBufferU32xN<4>(ctx, handle, address);
|
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
return ctx.OpBitcast(ctx.F32[1], EmitLoadBufferU32(ctx, inst, handle, address));
|
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
return ctx.OpBitcast(ctx.F32[2], EmitLoadBufferU32x2(ctx, inst, handle, address));
|
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
return ctx.OpBitcast(ctx.F32[3], EmitLoadBufferU32x3(ctx, inst, handle, address));
|
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
return ctx.OpBitcast(ctx.F32[4], EmitLoadBufferU32x4(ctx, inst, handle, address));
|
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
UNREACHABLE_MSG("SPIR-V instruction");
|
UNREACHABLE_MSG("SPIR-V instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <u32 N>
|
template <u32 N, BufferAlias alias>
|
||||||
static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
const auto& spv_buffer = ctx.buffers[handle];
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||||
|
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||||
|
}
|
||||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
|
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||||
|
const auto [id, pointer_type] = spv_buffer[alias];
|
||||||
if constexpr (N == 1) {
|
if constexpr (N == 1) {
|
||||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||||
ctx.OpStore(ptr, value);
|
ctx.OpStore(ptr, value);
|
||||||
} else {
|
} else {
|
||||||
for (u32 i = 0; i < N; i++) {
|
for (u32 i = 0; i < N; i++) {
|
||||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||||
const Id ptr =
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i);
|
ctx.OpStore(ptr, ctx.OpCompositeExtract(data_types[1], value, i));
|
||||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
|
const auto& spv_buffer = ctx.buffers[handle];
|
||||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||||
const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(8u))};
|
}
|
||||||
EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
|
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||||
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||||
|
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U8, value));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
|
const auto& spv_buffer = ctx.buffers[handle];
|
||||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||||
const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(16u))};
|
}
|
||||||
EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
|
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||||
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||||
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||||
|
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U16, value));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32xN<1>(ctx, handle, address, value);
|
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
|
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
|
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
|
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[1], value));
|
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32x2(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[2], value));
|
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32x3(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[3], value));
|
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
EmitStoreBufferU32x4(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[4], value));
|
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
|
|
|
@ -9,65 +9,35 @@ namespace Shader::Backend::SPIRV {
|
||||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
if (ctx.info.has_emulated_shared_memory) {
|
|
||||||
const Id pointer =
|
|
||||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
|
||||||
} else {
|
|
||||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||||
if (ctx.info.has_emulated_shared_memory) {
|
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
|
||||||
ctx.u32_zero_value, base_index)};
|
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
|
||||||
ctx.u32_zero_value, next_index)};
|
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
|
||||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
|
||||||
} else {
|
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.ConstU32(2U)};
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
if (ctx.info.has_emulated_shared_memory) {
|
|
||||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
|
||||||
ctx.u32_zero_value, word_offset);
|
|
||||||
ctx.OpStore(pointer, value);
|
|
||||||
} else {
|
|
||||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.ConstU32(2U)};
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||||
if (ctx.info.has_emulated_shared_memory) {
|
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
|
||||||
ctx.u32_zero_value, word_offset)};
|
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
|
||||||
ctx.u32_zero_value, next_offset)};
|
|
||||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
|
||||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
|
||||||
} else {
|
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -11,6 +11,9 @@ void EmitPrologue(EmitContext& ctx) {
|
||||||
if (ctx.stage == Stage::Fragment) {
|
if (ctx.stage == Stage::Fragment) {
|
||||||
ctx.DefineInterpolatedAttribs();
|
ctx.DefineInterpolatedAttribs();
|
||||||
}
|
}
|
||||||
|
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
|
||||||
|
ctx.DefineWorkgroupIndex();
|
||||||
|
}
|
||||||
ctx.DefineBufferOffsets();
|
ctx.DefineBufferOffsets();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/ir/passes/srt.h"
|
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
|
|
||||||
|
@ -107,6 +106,8 @@ Id EmitContext::Def(const IR::Value& value) {
|
||||||
void EmitContext::DefineArithmeticTypes() {
|
void EmitContext::DefineArithmeticTypes() {
|
||||||
void_id = Name(TypeVoid(), "void_id");
|
void_id = Name(TypeVoid(), "void_id");
|
||||||
U1[1] = Name(TypeBool(), "bool_id");
|
U1[1] = Name(TypeBool(), "bool_id");
|
||||||
|
U8 = Name(TypeUInt(8), "u8_id");
|
||||||
|
U16 = Name(TypeUInt(16), "u16_id");
|
||||||
if (info.uses_fp16) {
|
if (info.uses_fp16) {
|
||||||
F16[1] = Name(TypeFloat(16), "f16_id");
|
F16[1] = Name(TypeFloat(16), "f16_id");
|
||||||
U16 = Name(TypeUInt(16), "u16_id");
|
U16 = Name(TypeUInt(16), "u16_id");
|
||||||
|
@ -193,6 +194,9 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
||||||
|
|
||||||
void EmitContext::DefineBufferOffsets() {
|
void EmitContext::DefineBufferOffsets() {
|
||||||
for (BufferDefinition& buffer : buffers) {
|
for (BufferDefinition& buffer : buffers) {
|
||||||
|
if (buffer.buffer_type != BufferType::Guest) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const u32 binding = buffer.binding;
|
const u32 binding = buffer.binding;
|
||||||
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
|
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
|
||||||
const u32 comp = (binding & 0xf) >> 2;
|
const u32 comp = (binding & 0xf) >> 2;
|
||||||
|
@ -211,8 +215,7 @@ void EmitContext::DefineInterpolatedAttribs() {
|
||||||
if (!profile.needs_manual_interpolation) {
|
if (!profile.needs_manual_interpolation) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Iterate all input attributes, load them and manually interpolate with barycentric
|
// Iterate all input attributes, load them and manually interpolate.
|
||||||
// coordinates.
|
|
||||||
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||||
const auto& input = runtime_info.fs_info.inputs[i];
|
const auto& input = runtime_info.fs_info.inputs[i];
|
||||||
const u32 semantic = input.param_index;
|
const u32 semantic = input.param_index;
|
||||||
|
@ -237,6 +240,20 @@ void EmitContext::DefineInterpolatedAttribs() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineWorkgroupIndex() {
|
||||||
|
const Id workgroup_id_val{OpLoad(U32[3], workgroup_id)};
|
||||||
|
const Id workgroup_x{OpCompositeExtract(U32[1], workgroup_id_val, 0)};
|
||||||
|
const Id workgroup_y{OpCompositeExtract(U32[1], workgroup_id_val, 1)};
|
||||||
|
const Id workgroup_z{OpCompositeExtract(U32[1], workgroup_id_val, 2)};
|
||||||
|
const Id num_workgroups{OpLoad(U32[3], num_workgroups_id)};
|
||||||
|
const Id num_workgroups_x{OpCompositeExtract(U32[1], num_workgroups, 0)};
|
||||||
|
const Id num_workgroups_y{OpCompositeExtract(U32[1], num_workgroups, 1)};
|
||||||
|
workgroup_index_id =
|
||||||
|
OpIAdd(U32[1], OpIAdd(U32[1], workgroup_x, OpIMul(U32[1], workgroup_y, num_workgroups_x)),
|
||||||
|
OpIMul(U32[1], workgroup_z, OpIMul(U32[1], num_workgroups_x, num_workgroups_y)));
|
||||||
|
Name(workgroup_index_id, "workgroup_index");
|
||||||
|
}
|
||||||
|
|
||||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||||
switch (default_value) {
|
switch (default_value) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -305,9 +322,16 @@ void EmitContext::DefineInputs() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LogicalStage::Fragment:
|
case LogicalStage::Fragment:
|
||||||
|
if (info.loads.GetAny(IR::Attribute::FragCoord)) {
|
||||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||||
|
}
|
||||||
|
if (info.stores.Get(IR::Attribute::Depth)) {
|
||||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
}
|
||||||
|
if (info.loads.Get(IR::Attribute::IsFrontFace)) {
|
||||||
|
front_facing =
|
||||||
|
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||||
|
}
|
||||||
if (profile.needs_manual_interpolation) {
|
if (profile.needs_manual_interpolation) {
|
||||||
gl_bary_coord_id =
|
gl_bary_coord_id =
|
||||||
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||||
|
@ -342,9 +366,19 @@ void EmitContext::DefineInputs() {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LogicalStage::Compute:
|
case LogicalStage::Compute:
|
||||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
if (info.loads.GetAny(IR::Attribute::WorkgroupIndex) ||
|
||||||
|
info.loads.GetAny(IR::Attribute::WorkgroupId)) {
|
||||||
|
workgroup_id =
|
||||||
|
DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||||
|
}
|
||||||
|
if (info.loads.GetAny(IR::Attribute::WorkgroupIndex)) {
|
||||||
|
num_workgroups_id =
|
||||||
|
DefineVariable(U32[3], spv::BuiltIn::NumWorkgroups, spv::StorageClass::Input);
|
||||||
|
}
|
||||||
|
if (info.loads.GetAny(IR::Attribute::LocalInvocationId)) {
|
||||||
local_invocation_id =
|
local_invocation_id =
|
||||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case LogicalStage::Geometry: {
|
case LogicalStage::Geometry: {
|
||||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||||
|
@ -588,78 +622,74 @@ void EmitContext::DefinePushDataBlock() {
|
||||||
interfaces.push_back(push_data_block);
|
interfaces.push_back(push_data_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineBuffers() {
|
EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_written, u32 elem_shift,
|
||||||
boost::container::small_vector<Id, 8> type_ids;
|
BufferType buffer_type, Id data_type) {
|
||||||
const auto define_struct = [&](Id record_array_type, bool is_instance_data,
|
// Define array type.
|
||||||
std::optional<std::string_view> explicit_name = {}) {
|
const Id max_num_items = ConstU32(u32(profile.max_ubo_size) >> elem_shift);
|
||||||
|
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
||||||
|
: TypeArray(data_type, max_num_items)};
|
||||||
|
// Define block struct type. Don't perform decorations twice on the same Id.
|
||||||
const Id struct_type{TypeStruct(record_array_type)};
|
const Id struct_type{TypeStruct(record_array_type)};
|
||||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
|
if (std::ranges::find(buf_type_ids, record_array_type.value, &Id::value) ==
|
||||||
return struct_type;
|
buf_type_ids.end()) {
|
||||||
}
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 1 << elem_shift);
|
||||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
|
||||||
auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
|
|
||||||
: fmt::format("{}_cbuf_block_f32", stage);
|
|
||||||
name = explicit_name.value_or(name);
|
|
||||||
Name(struct_type, name);
|
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, 0, "data");
|
MemberName(struct_type, 0, "data");
|
||||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||||
type_ids.push_back(record_array_type);
|
buf_type_ids.push_back(record_array_type);
|
||||||
return struct_type;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (info.has_readconst) {
|
|
||||||
const Id data_type = U32[1];
|
|
||||||
const auto storage_class = spv::StorageClass::Uniform;
|
|
||||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
|
||||||
const Id record_array_type{
|
|
||||||
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
|
|
||||||
|
|
||||||
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
|
|
||||||
|
|
||||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
|
||||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
|
||||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
|
||||||
Name(id, "srt_flatbuf_ubo");
|
|
||||||
|
|
||||||
srt_flatbuf = {
|
|
||||||
.id = id,
|
|
||||||
.binding = binding.buffer++,
|
|
||||||
.pointer_type = pointer_type,
|
|
||||||
};
|
|
||||||
interfaces.push_back(id);
|
|
||||||
}
|
}
|
||||||
|
// Define buffer binding interface.
|
||||||
for (const auto& desc : info.buffers) {
|
|
||||||
const auto sharp = desc.GetSharp(info);
|
|
||||||
const bool is_storage = desc.IsStorage(sharp, profile);
|
|
||||||
const u32 array_size = profile.max_ubo_size >> 2;
|
|
||||||
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
|
|
||||||
const Id data_type = (*data_types)[1];
|
|
||||||
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
|
||||||
: TypeArray(data_type, ConstU32(array_size))};
|
|
||||||
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
|
|
||||||
|
|
||||||
const auto storage_class =
|
const auto storage_class =
|
||||||
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
Decorate(id, spv::Decoration::Binding, binding.unified);
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
if (is_storage && !desc.is_written) {
|
if (is_storage && !is_written) {
|
||||||
Decorate(id, spv::Decoration::NonWritable);
|
Decorate(id, spv::Decoration::NonWritable);
|
||||||
}
|
}
|
||||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sharp_idx));
|
switch (buffer_type) {
|
||||||
|
case Shader::BufferType::GdsBuffer:
|
||||||
buffers.push_back({
|
Name(id, "gds_buffer");
|
||||||
.id = id,
|
break;
|
||||||
.binding = binding.buffer++,
|
case Shader::BufferType::ReadConstUbo:
|
||||||
.data_types = data_types,
|
Name(id, "srt_flatbuf_ubo");
|
||||||
.pointer_type = pointer_type,
|
break;
|
||||||
});
|
case Shader::BufferType::SharedMemory:
|
||||||
|
Name(id, "ssbo_shmem");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer));
|
||||||
|
}
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
|
return {id, pointer_type};
|
||||||
|
};
|
||||||
|
|
||||||
|
void EmitContext::DefineBuffers() {
|
||||||
|
for (const auto& desc : info.buffers) {
|
||||||
|
const auto buf_sharp = desc.GetSharp(info);
|
||||||
|
const bool is_storage = desc.IsStorage(buf_sharp, profile);
|
||||||
|
|
||||||
|
// Define aliases depending on the shader usage.
|
||||||
|
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
||||||
|
if (True(desc.used_types & IR::Type::U32)) {
|
||||||
|
spv_buffer[BufferAlias::U32] =
|
||||||
|
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
||||||
|
}
|
||||||
|
if (True(desc.used_types & IR::Type::F32)) {
|
||||||
|
spv_buffer[BufferAlias::F32] =
|
||||||
|
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
|
||||||
|
}
|
||||||
|
if (True(desc.used_types & IR::Type::U16)) {
|
||||||
|
spv_buffer[BufferAlias::U16] =
|
||||||
|
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
|
||||||
|
}
|
||||||
|
if (True(desc.used_types & IR::Type::U8)) {
|
||||||
|
spv_buffer[BufferAlias::U8] =
|
||||||
|
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
|
||||||
|
}
|
||||||
|
++binding.unified;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -809,51 +839,18 @@ void EmitContext::DefineImagesAndSamplers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineSharedMemory() {
|
void EmitContext::DefineSharedMemory() {
|
||||||
static constexpr size_t DefaultSharedMemSize = 2_KB;
|
|
||||||
if (!info.uses_shared) {
|
if (!info.uses_shared) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ASSERT(info.stage == Stage::Compute);
|
ASSERT(info.stage == Stage::Compute);
|
||||||
|
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
const u32 max_shared_memory_size = profile.max_shared_memory_size;
|
|
||||||
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
|
||||||
if (shared_memory_size == 0) {
|
|
||||||
shared_memory_size = DefaultSharedMemSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
||||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||||
|
|
||||||
if (shared_memory_size <= max_shared_memory_size) {
|
|
||||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||||
Name(shared_memory_u32, "shared_mem");
|
Name(shared_memory_u32, "shared_mem");
|
||||||
interfaces.push_back(shared_memory_u32);
|
interfaces.push_back(shared_memory_u32);
|
||||||
} else {
|
|
||||||
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
|
|
||||||
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
|
||||||
|
|
||||||
Decorate(type, spv::Decoration::ArrayStride, 4);
|
|
||||||
|
|
||||||
const Id struct_type{TypeStruct(type)};
|
|
||||||
Name(struct_type, "shared_memory_buf");
|
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
|
||||||
MemberName(struct_type, 0, "data");
|
|
||||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
|
||||||
|
|
||||||
const Id struct_pointer_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
|
|
||||||
const Id ssbo_id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::StorageBuffer)};
|
|
||||||
Decorate(ssbo_id, spv::Decoration::Binding, binding.unified++);
|
|
||||||
Decorate(ssbo_id, spv::Decoration::DescriptorSet, 0U);
|
|
||||||
Name(ssbo_id, "shared_mem_ssbo");
|
|
||||||
|
|
||||||
shared_memory_u32 = ssbo_id;
|
|
||||||
|
|
||||||
info.has_emulated_shared_memory = true;
|
|
||||||
info.shared_memory_size = shared_memory_size;
|
|
||||||
interfaces.push_back(ssbo_id);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
#include "shader_recompiler/backend/bindings.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/value.h"
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
@ -45,6 +45,7 @@ public:
|
||||||
|
|
||||||
void DefineBufferOffsets();
|
void DefineBufferOffsets();
|
||||||
void DefineInterpolatedAttribs();
|
void DefineInterpolatedAttribs();
|
||||||
|
void DefineWorkgroupIndex();
|
||||||
|
|
||||||
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||||
|
@ -200,8 +201,10 @@ public:
|
||||||
std::array<Id, 30> patches{};
|
std::array<Id, 30> patches{};
|
||||||
|
|
||||||
Id workgroup_id{};
|
Id workgroup_id{};
|
||||||
|
Id num_workgroups_id{};
|
||||||
|
Id workgroup_index_id{};
|
||||||
Id local_invocation_id{};
|
Id local_invocation_id{};
|
||||||
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
|
Id invocation_id{};
|
||||||
Id subgroup_local_invocation_id{};
|
Id subgroup_local_invocation_id{};
|
||||||
Id image_u32{};
|
Id image_u32{};
|
||||||
|
|
||||||
|
@ -227,18 +230,38 @@ public:
|
||||||
bool is_storage = false;
|
bool is_storage = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BufferDefinition {
|
enum class BufferAlias : u32 {
|
||||||
|
U8,
|
||||||
|
U16,
|
||||||
|
U32,
|
||||||
|
F32,
|
||||||
|
NumAlias,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BufferSpv {
|
||||||
Id id;
|
Id id;
|
||||||
Id offset;
|
|
||||||
Id offset_dwords;
|
|
||||||
u32 binding;
|
|
||||||
const VectorIds* data_types;
|
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct BufferDefinition {
|
||||||
|
u32 binding;
|
||||||
|
BufferType buffer_type;
|
||||||
|
Id offset;
|
||||||
|
Id offset_dwords;
|
||||||
|
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
|
||||||
|
|
||||||
|
const BufferSpv& operator[](BufferAlias alias) const {
|
||||||
|
return aliases[u32(alias)];
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferSpv& operator[](BufferAlias alias) {
|
||||||
|
return aliases[u32(alias)];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Bindings& binding;
|
Bindings& binding;
|
||||||
|
boost::container::small_vector<Id, 16> buf_type_ids;
|
||||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||||
BufferDefinition srt_flatbuf;
|
|
||||||
boost::container::small_vector<TextureDefinition, 8> images;
|
boost::container::small_vector<TextureDefinition, 8> images;
|
||||||
boost::container::small_vector<Id, 4> samplers;
|
boost::container::small_vector<Id, 4> samplers;
|
||||||
|
|
||||||
|
@ -279,6 +302,9 @@ private:
|
||||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
|
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
|
||||||
bool output);
|
bool output);
|
||||||
|
|
||||||
|
BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type,
|
||||||
|
Id data_type);
|
||||||
|
|
||||||
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
|
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
|
||||||
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
|
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
|
||||||
};
|
};
|
||||||
|
|
|
@ -176,6 +176,13 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
const IR::VectorReg data0{inst.src[1].code};
|
const IR::VectorReg data0{inst.src[1].code};
|
||||||
const IR::VectorReg data1{inst.src[2].code};
|
const IR::VectorReg data1{inst.src[2].code};
|
||||||
|
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
||||||
|
if (info.stage == Stage::Fragment) {
|
||||||
|
ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0,
|
||||||
|
"Unexpected shared memory offset alignment: {}", offset);
|
||||||
|
ir.SetVectorReg(GetScratchVgpr(offset), ir.GetVectorReg(data0));
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (is_pair) {
|
if (is_pair) {
|
||||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||||
|
@ -195,14 +202,12 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
||||||
addr1);
|
addr1);
|
||||||
}
|
}
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::U32 addr0 = ir.IAdd(
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
||||||
const IR::Value data =
|
const IR::Value data =
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||||
ir.WriteShared(bit_size, data, addr0);
|
ir.WriteShared(bit_size, data, addr0);
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
||||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -223,6 +228,13 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||||
const GcnInst& inst) {
|
const GcnInst& inst) {
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
||||||
|
if (info.stage == Stage::Fragment) {
|
||||||
|
ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0,
|
||||||
|
"Unexpected shared memory offset alignment: {}", offset);
|
||||||
|
ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (is_pair) {
|
if (is_pair) {
|
||||||
// Pair loads are either 32 or 64-bit
|
// Pair loads are either 32 or 64-bit
|
||||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||||
|
@ -243,14 +255,12 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
||||||
}
|
}
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::U32 addr0 = ir.IAdd(
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
||||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
||||||
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
||||||
ir.SetVectorReg(dst_reg, data);
|
ir.SetVectorReg(dst_reg, data);
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
u32 SwizzleMrtComponent(const FragmentRuntimeInfo::PsColorBuffer& color_buffer, u32 comp) {
|
u32 SwizzleMrtComponent(const PsColorBuffer& color_buffer, u32 comp) {
|
||||||
const auto [r, g, b, a] = color_buffer.swizzle;
|
const auto [r, g, b, a] = color_buffer.swizzle;
|
||||||
const std::array swizzle_array = {r, g, b, a};
|
const std::array swizzle_array = {r, g, b, a};
|
||||||
const auto swizzled_comp_type = static_cast<u32>(swizzle_array[comp]);
|
const auto swizzled_comp_type = static_cast<u32>(swizzle_array[comp]);
|
||||||
|
@ -16,7 +16,7 @@ u32 SwizzleMrtComponent(const FragmentRuntimeInfo::PsColorBuffer& color_buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
||||||
const FragmentRuntimeInfo::PsColorBuffer& color_buffer) {
|
const PsColorBuffer& color_buffer) {
|
||||||
auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion);
|
auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion);
|
||||||
if (color_buffer.needs_unorm_fixup) {
|
if (color_buffer.needs_unorm_fixup) {
|
||||||
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
#include "shader_recompiler/exception.h"
|
|
||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
|
@ -21,9 +20,14 @@
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
|
static u32 next_vgpr_num;
|
||||||
|
static std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
||||||
|
|
||||||
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
||||||
const Profile& profile_)
|
const Profile& profile_)
|
||||||
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {}
|
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {
|
||||||
|
next_vgpr_num = vgpr_map.empty() ? runtime_info.num_allocated_vgprs : next_vgpr_num;
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::EmitPrologue() {
|
void Translator::EmitPrologue() {
|
||||||
ir.Prologue();
|
ir.Prologue();
|
||||||
|
@ -179,8 +183,21 @@ void Translator::EmitPrologue() {
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unknown shader stage");
|
UNREACHABLE_MSG("Unknown shader stage");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear any scratch vgpr mappings for next shader.
|
||||||
|
vgpr_map.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::VectorReg Translator::GetScratchVgpr(u32 offset) {
|
||||||
|
const auto [it, is_new] = vgpr_map.try_emplace(offset);
|
||||||
|
if (is_new) {
|
||||||
|
ASSERT_MSG(next_vgpr_num < 256, "Out of VGPRs");
|
||||||
|
const auto new_vgpr = static_cast<IR::VectorReg>(next_vgpr_num++);
|
||||||
|
it->second = new_vgpr;
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T Translator::GetSrc(const InstOperand& operand) {
|
T Translator::GetSrc(const InstOperand& operand) {
|
||||||
constexpr bool is_float = std::is_same_v<T, IR::F32>;
|
constexpr bool is_float = std::is_same_v<T, IR::F32>;
|
||||||
|
@ -490,7 +507,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
info.buffers.push_back({
|
info.buffers.push_back({
|
||||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
||||||
.used_types = IR::Type::F32,
|
.used_types = IR::Type::F32,
|
||||||
.is_instance_data = true,
|
|
||||||
.instance_attrib = attrib.semantic,
|
.instance_attrib = attrib.semantic,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -309,7 +309,7 @@ private:
|
||||||
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
||||||
|
|
||||||
void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
||||||
const FragmentRuntimeInfo::PsColorBuffer& color_buffer);
|
const PsColorBuffer& color_buffer);
|
||||||
void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
||||||
void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
||||||
void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
||||||
|
@ -317,6 +317,8 @@ private:
|
||||||
|
|
||||||
void LogMissingOpcode(const GcnInst& inst);
|
void LogMissingOpcode(const GcnInst& inst);
|
||||||
|
|
||||||
|
IR::VectorReg GetScratchVgpr(u32 offset);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
Info& info;
|
Info& info;
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
@ -19,7 +18,6 @@
|
||||||
#include "shader_recompiler/params.h"
|
#include "shader_recompiler/params.h"
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
@ -37,21 +35,30 @@ enum class TextureType : u32 {
|
||||||
};
|
};
|
||||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||||
|
|
||||||
|
enum class BufferType : u32 {
|
||||||
|
Guest,
|
||||||
|
ReadConstUbo,
|
||||||
|
GdsBuffer,
|
||||||
|
SharedMemory,
|
||||||
|
};
|
||||||
|
|
||||||
struct Info;
|
struct Info;
|
||||||
|
|
||||||
struct BufferResource {
|
struct BufferResource {
|
||||||
u32 sharp_idx;
|
u32 sharp_idx;
|
||||||
IR::Type used_types;
|
IR::Type used_types;
|
||||||
AmdGpu::Buffer inline_cbuf;
|
AmdGpu::Buffer inline_cbuf;
|
||||||
bool is_gds_buffer{};
|
BufferType buffer_type;
|
||||||
bool is_instance_data{};
|
|
||||||
u8 instance_attrib{};
|
u8 instance_attrib{};
|
||||||
bool is_written{};
|
bool is_written{};
|
||||||
bool is_formatted{};
|
bool is_formatted{};
|
||||||
|
|
||||||
[[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer,
|
bool IsSpecial() const noexcept {
|
||||||
const Profile& profile) const noexcept {
|
return buffer_type != BufferType::Guest;
|
||||||
return buffer.GetSize() > profile.max_ubo_size || is_written || is_gds_buffer;
|
}
|
||||||
|
|
||||||
|
bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept {
|
||||||
|
return buffer.GetSize() > profile.max_ubo_size || is_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||||
|
@ -193,10 +200,8 @@ struct Info {
|
||||||
bool uses_unpack_10_11_11{};
|
bool uses_unpack_10_11_11{};
|
||||||
bool stores_tess_level_outer{};
|
bool stores_tess_level_outer{};
|
||||||
bool stores_tess_level_inner{};
|
bool stores_tess_level_inner{};
|
||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
bool translation_failed{};
|
||||||
bool has_emulated_shared_memory{};
|
|
||||||
bool has_readconst{};
|
bool has_readconst{};
|
||||||
u32 shared_memory_size{};
|
|
||||||
u8 mrt_mask{0u};
|
u8 mrt_mask{0u};
|
||||||
bool has_fetch_shader{false};
|
bool has_fetch_shader{false};
|
||||||
u32 fetch_shader_sgpr_base{0u};
|
u32 fetch_shader_sgpr_base{0u};
|
||||||
|
@ -233,10 +238,8 @@ struct Info {
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddBindings(Backend::Bindings& bnd) const {
|
void AddBindings(Backend::Bindings& bnd) const {
|
||||||
const auto total_buffers =
|
bnd.buffer += buffers.size();
|
||||||
buffers.size() + (has_readconst ? 1 : 0) + (has_emulated_shared_memory ? 1 : 0);
|
bnd.unified += buffers.size() + images.size() + samplers.size();
|
||||||
bnd.buffer += total_buffers;
|
|
||||||
bnd.unified += total_buffers + images.size() + samplers.size();
|
|
||||||
bnd.user_data += ud_mask.NumRegs();
|
bnd.user_data += ud_mask.NumRegs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,14 +286,3 @@ constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
template <>
|
|
||||||
struct fmt::formatter<Shader::Stage> {
|
|
||||||
constexpr auto parse(format_parse_context& ctx) {
|
|
||||||
return ctx.begin();
|
|
||||||
}
|
|
||||||
auto format(const Shader::Stage stage, format_context& ctx) const {
|
|
||||||
constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"};
|
|
||||||
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
|
@ -69,16 +69,17 @@ enum class Attribute : u64 {
|
||||||
SampleIndex = 72,
|
SampleIndex = 72,
|
||||||
GlobalInvocationId = 73,
|
GlobalInvocationId = 73,
|
||||||
WorkgroupId = 74,
|
WorkgroupId = 74,
|
||||||
LocalInvocationId = 75,
|
WorkgroupIndex = 75,
|
||||||
LocalInvocationIndex = 76,
|
LocalInvocationId = 76,
|
||||||
FragCoord = 77,
|
LocalInvocationIndex = 77,
|
||||||
InstanceId0 = 78, // step rate 0
|
FragCoord = 78,
|
||||||
InstanceId1 = 79, // step rate 1
|
InstanceId0 = 79, // step rate 0
|
||||||
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
|
InstanceId1 = 80, // step rate 1
|
||||||
PatchVertices = 81,
|
InvocationId = 81, // TCS id in output patch and instanced geometry shader id
|
||||||
TessellationEvaluationPointU = 82,
|
PatchVertices = 82,
|
||||||
TessellationEvaluationPointV = 83,
|
TessellationEvaluationPointU = 83,
|
||||||
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
|
TessellationEvaluationPointV = 84,
|
||||||
|
PackedHullInvocationInfo = 85, // contains patch id within the VGT and invocation ID
|
||||||
Max,
|
Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -20,12 +20,14 @@ void FlattenExtendedUserdataPass(IR::Program& program);
|
||||||
void ResourceTrackingPass(IR::Program& program);
|
void ResourceTrackingPass(IR::Program& program);
|
||||||
void CollectShaderInfoPass(IR::Program& program);
|
void CollectShaderInfoPass(IR::Program& program);
|
||||||
void LowerBufferFormatToRaw(IR::Program& program);
|
void LowerBufferFormatToRaw(IR::Program& program);
|
||||||
void LowerSharedMemToRegisters(IR::Program& program, const RuntimeInfo& runtime_info);
|
|
||||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
Stage stage);
|
Stage stage);
|
||||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile);
|
void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
|
const Profile& profile);
|
||||||
|
void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
|
const Profile& profile);
|
||||||
|
|
||||||
} // namespace Shader::Optimization
|
} // namespace Shader::Optimization
|
||||||
|
|
|
@ -1,81 +0,0 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
||||||
|
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
|
||||||
#include "shader_recompiler/ir/program.h"
|
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
|
||||||
|
|
||||||
static bool IsSharedMemoryInst(const IR::Inst& inst) {
|
|
||||||
const auto opcode = inst.GetOpcode();
|
|
||||||
return opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64 ||
|
|
||||||
opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64;
|
|
||||||
}
|
|
||||||
|
|
||||||
static u32 GetSharedMemImmOffset(const IR::Inst& inst) {
|
|
||||||
const auto* address = inst.Arg(0).InstRecursive();
|
|
||||||
ASSERT(address->GetOpcode() == IR::Opcode::IAdd32);
|
|
||||||
const auto ir_offset = address->Arg(1);
|
|
||||||
ASSERT_MSG(ir_offset.IsImmediate());
|
|
||||||
const auto offset = ir_offset.U32();
|
|
||||||
// Typical usage is the compiler spilling registers into shared memory, with 256 bytes between
|
|
||||||
// each register to account for 4 bytes per register times 64 threads per group. Ensure that
|
|
||||||
// this assumption holds, as if it does not this approach may need to be revised.
|
|
||||||
ASSERT_MSG(offset % 256 == 0, "Unexpected shared memory offset alignment: {}", offset);
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ConvertSharedMemToVgpr(IR::IREmitter& ir, IR::Inst& inst, const IR::VectorReg vgpr) {
|
|
||||||
switch (inst.GetOpcode()) {
|
|
||||||
case IR::Opcode::LoadSharedU32:
|
|
||||||
inst.ReplaceUsesWithAndRemove(ir.GetVectorReg(vgpr));
|
|
||||||
break;
|
|
||||||
case IR::Opcode::LoadSharedU64:
|
|
||||||
inst.ReplaceUsesWithAndRemove(
|
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(vgpr), ir.GetVectorReg(vgpr + 1)));
|
|
||||||
break;
|
|
||||||
case IR::Opcode::WriteSharedU32:
|
|
||||||
ir.SetVectorReg(vgpr, IR::U32{inst.Arg(1)});
|
|
||||||
inst.Invalidate();
|
|
||||||
break;
|
|
||||||
case IR::Opcode::WriteSharedU64: {
|
|
||||||
const auto value = inst.Arg(1);
|
|
||||||
ir.SetVectorReg(vgpr, IR::U32{ir.CompositeExtract(value, 0)});
|
|
||||||
ir.SetVectorReg(vgpr, IR::U32{ir.CompositeExtract(value, 1)});
|
|
||||||
inst.Invalidate();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
UNREACHABLE_MSG("Unknown shared memory opcode: {}", inst.GetOpcode());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void LowerSharedMemToRegisters(IR::Program& program, const RuntimeInfo& runtime_info) {
|
|
||||||
u32 next_vgpr_num = runtime_info.num_allocated_vgprs;
|
|
||||||
std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
|
||||||
const auto get_vgpr = [&next_vgpr_num, &vgpr_map](const u32 offset) {
|
|
||||||
const auto [it, is_new] = vgpr_map.try_emplace(offset);
|
|
||||||
if (is_new) {
|
|
||||||
ASSERT_MSG(next_vgpr_num < 256, "Out of VGPRs");
|
|
||||||
const auto new_vgpr = static_cast<IR::VectorReg>(next_vgpr_num++);
|
|
||||||
it->second = new_vgpr;
|
|
||||||
}
|
|
||||||
return it->second;
|
|
||||||
};
|
|
||||||
|
|
||||||
for (IR::Block* const block : program.blocks) {
|
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
|
||||||
if (!IsSharedMemoryInst(inst)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto offset = GetSharedMemImmOffset(inst);
|
|
||||||
const auto vgpr = get_vgpr(offset);
|
|
||||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
|
||||||
ConvertSharedMemToVgpr(ir, inst, vgpr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Optimization
|
|
|
@ -78,8 +78,21 @@ bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::LoadBufferU8:
|
||||||
|
case IR::Opcode::StoreBufferU8:
|
||||||
|
return IR::Type::U8;
|
||||||
|
case IR::Opcode::LoadBufferU16:
|
||||||
|
case IR::Opcode::StoreBufferU16:
|
||||||
|
return IR::Type::U16;
|
||||||
|
case IR::Opcode::LoadBufferFormatF32:
|
||||||
|
case IR::Opcode::StoreBufferFormatF32:
|
||||||
|
// Formatted buffer loads can use a variety of types.
|
||||||
|
return IR::Type::U32 | IR::Type::F32 | IR::Type::U16 | IR::Type::U8;
|
||||||
|
default:
|
||||||
return IR::Type::U32;
|
return IR::Type::U32;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool IsImageAtomicInstruction(const IR::Inst& inst) {
|
bool IsImageAtomicInstruction(const IR::Inst& inst) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
|
@ -121,11 +134,9 @@ public:
|
||||||
|
|
||||||
u32 Add(const BufferResource& desc) {
|
u32 Add(const BufferResource& desc) {
|
||||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||||
// Only one GDS binding can exist.
|
return desc.sharp_idx == existing.sharp_idx &&
|
||||||
if (desc.is_gds_buffer && existing.is_gds_buffer) {
|
desc.inline_cbuf == existing.inline_cbuf &&
|
||||||
return true;
|
desc.buffer_type == existing.buffer_type;
|
||||||
}
|
|
||||||
return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf;
|
|
||||||
})};
|
})};
|
||||||
auto& buffer = buffer_resources[index];
|
auto& buffer = buffer_resources[index];
|
||||||
buffer.used_types |= desc.used_types;
|
buffer.used_types |= desc.used_types;
|
||||||
|
@ -272,6 +283,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||||
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
|
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
|
||||||
.inline_cbuf = cbuf,
|
.inline_cbuf = cbuf,
|
||||||
|
.buffer_type = BufferType::Guest,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,6 +298,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||||
binding = descriptors.Add(BufferResource{
|
binding = descriptors.Add(BufferResource{
|
||||||
.sharp_idx = sharp,
|
.sharp_idx = sharp,
|
||||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||||
|
.buffer_type = BufferType::Guest,
|
||||||
.is_written = IsBufferStore(inst),
|
.is_written = IsBufferStore(inst),
|
||||||
.is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
|
.is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
|
||||||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||||
|
@ -402,13 +415,10 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
// Insert gds binding in the shader if it doesn't exist already.
|
|
||||||
// The buffer is used for append/consume counters.
|
|
||||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
|
||||||
const u32 binding = descriptors.Add(BufferResource{
|
const u32 binding = descriptors.Add(BufferResource{
|
||||||
.used_types = IR::Type::U32,
|
.used_types = IR::Type::U32,
|
||||||
.inline_cbuf = GdsSharp,
|
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||||
.is_gds_buffer = true,
|
.buffer_type = BufferType::GdsBuffer,
|
||||||
.is_written = true,
|
.is_written = true,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -420,12 +430,12 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
||||||
};
|
};
|
||||||
|
|
||||||
// Attempt to deduce the GDS address of counter at compile time.
|
// Attempt to deduce the GDS address of counter at compile time.
|
||||||
const u32 gds_addr = [&] {
|
u32 gds_addr = 0;
|
||||||
const IR::Value& gds_offset = inst.Arg(0);
|
const IR::Value& gds_offset = inst.Arg(0);
|
||||||
if (gds_offset.IsImmediate()) {
|
if (gds_offset.IsImmediate()) {
|
||||||
// Nothing to do, offset is known.
|
// Nothing to do, offset is known.
|
||||||
return gds_offset.U32() & 0xFFFF;
|
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||||
}
|
} else {
|
||||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||||
ASSERT_MSG(result, "Unable to track M0 source");
|
ASSERT_MSG(result, "Unable to track M0 source");
|
||||||
|
|
||||||
|
@ -436,8 +446,8 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
||||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||||
m0_val += prod->Arg(1).U32();
|
m0_val += prod->Arg(1).U32();
|
||||||
}
|
}
|
||||||
return m0_val & 0xFFFF;
|
gds_addr = m0_val & 0xFFFF;
|
||||||
}();
|
}
|
||||||
|
|
||||||
// Patch instruction.
|
// Patch instruction.
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
|
|
@ -74,7 +74,14 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||||
info.uses_lane_id = true;
|
info.uses_lane_id = true;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::ReadConst:
|
case IR::Opcode::ReadConst:
|
||||||
|
if (!info.has_readconst) {
|
||||||
|
info.buffers.push_back({
|
||||||
|
.used_types = IR::Type::U32,
|
||||||
|
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||||
|
.buffer_type = BufferType::ReadConstUbo,
|
||||||
|
});
|
||||||
info.has_readconst = true;
|
info.has_readconst = true;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::PackUfloat10_11_11:
|
case IR::Opcode::PackUfloat10_11_11:
|
||||||
info.uses_pack_10_11_11 = true;
|
info.uses_pack_10_11_11 = true;
|
||||||
|
@ -88,10 +95,9 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CollectShaderInfoPass(IR::Program& program) {
|
void CollectShaderInfoPass(IR::Program& program) {
|
||||||
Info& info{program.info};
|
|
||||||
for (IR::Block* const block : program.post_order_blocks) {
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
Visit(info, inst);
|
Visit(program.info, inst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,37 +8,46 @@
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
|
static bool IsLoadShared(const IR::Inst& inst) {
|
||||||
|
return inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
|
||||||
|
inst.GetOpcode() == IR::Opcode::LoadSharedU64;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsWriteShared(const IR::Inst& inst) {
|
||||||
|
return inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
|
||||||
|
inst.GetOpcode() == IR::Opcode::WriteSharedU64;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inserts barriers when a shared memory write and read occur in the same basic block.
|
||||||
static void EmitBarrierInBlock(IR::Block* block) {
|
static void EmitBarrierInBlock(IR::Block* block) {
|
||||||
// This is inteded to insert a barrier when shared memory write and read
|
enum class BarrierAction : u32 {
|
||||||
// occur in the same basic block. Also checks if branch depth is zero as
|
None,
|
||||||
// we don't want to insert barrier in potentially divergent code.
|
BarrierOnWrite,
|
||||||
bool emit_barrier_on_write = false;
|
BarrierOnRead,
|
||||||
bool emit_barrier_on_read = false;
|
};
|
||||||
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
|
BarrierAction action{};
|
||||||
if (emit_cond) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
if (IsLoadShared(inst)) {
|
||||||
|
if (action == BarrierAction::BarrierOnRead) {
|
||||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
ir.Barrier();
|
ir.Barrier();
|
||||||
emit_cond = false;
|
|
||||||
}
|
}
|
||||||
};
|
action = BarrierAction::BarrierOnWrite;
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
continue;
|
||||||
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
|
|
||||||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
|
|
||||||
emit_barrier(emit_barrier_on_read, inst);
|
|
||||||
emit_barrier_on_write = true;
|
|
||||||
}
|
}
|
||||||
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
|
if (IsWriteShared(inst)) {
|
||||||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
|
if (action == BarrierAction::BarrierOnWrite) {
|
||||||
emit_barrier(emit_barrier_on_write, inst);
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
emit_barrier_on_read = true;
|
ir.Barrier();
|
||||||
|
}
|
||||||
|
action = BarrierAction::BarrierOnRead;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inserts a barrier after divergent conditional blocks to avoid undefined
|
||||||
|
// behavior when some threads write and others read from shared memory.
|
||||||
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
||||||
// Insert a barrier after divergent conditional blocks.
|
|
||||||
// This avoids potential softlocks and crashes when some threads
|
|
||||||
// initialize shared memory and others read from it.
|
|
||||||
const IR::U1 cond = data.if_node.cond;
|
const IR::U1 cond = data.if_node.cond;
|
||||||
const auto insert_barrier =
|
const auto insert_barrier =
|
||||||
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||||
|
@ -56,8 +65,21 @@ static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
static constexpr u32 GcnSubgroupSize = 64;
|
||||||
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
|
|
||||||
|
void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
|
const Profile& profile) {
|
||||||
|
if (program.info.stage != Stage::Compute) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto& cs_info = runtime_info.cs_info;
|
||||||
|
const u32 shared_memory_size = cs_info.shared_memory_size;
|
||||||
|
const u32 threadgroup_size =
|
||||||
|
cs_info.workgroup_size[0] * cs_info.workgroup_size[1] * cs_info.workgroup_size[2];
|
||||||
|
// The compiler can only omit barriers when the local workgroup size is the same as the HW
|
||||||
|
// subgroup.
|
||||||
|
if (shared_memory_size == 0 || threadgroup_size != GcnSubgroupSize ||
|
||||||
|
!profile.needs_lds_barriers) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
using Type = IR::AbstractSyntaxNode::Type;
|
using Type = IR::AbstractSyntaxNode::Type;
|
||||||
|
@ -67,6 +89,8 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
||||||
--branch_depth;
|
--branch_depth;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// Check if branch depth is zero, we don't want to insert barrier in potentially divergent
|
||||||
|
// code.
|
||||||
if (node.type == Type::If && branch_depth++ == 0) {
|
if (node.type == Type::If && branch_depth++ == 0) {
|
||||||
EmitBarrierInMergeBlock(node.data);
|
EmitBarrierInMergeBlock(node.data);
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
|
static bool IsSharedAccess(const IR::Inst& inst) {
|
||||||
|
const auto opcode = inst.GetOpcode();
|
||||||
|
switch (opcode) {
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
|
case IR::Opcode::SharedAtomicUMax32:
|
||||||
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
|
case IR::Opcode::SharedAtomicUMin32:
|
||||||
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
|
const Profile& profile) {
|
||||||
|
if (program.info.stage != Stage::Compute) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Only perform the transform if the host shared memory is insufficient.
|
||||||
|
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
|
if (shared_memory_size <= profile.max_shared_memory_size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Add buffer binding for shared memory storage buffer.
|
||||||
|
const u32 binding = static_cast<u32>(program.info.buffers.size());
|
||||||
|
program.info.buffers.push_back({
|
||||||
|
.used_types = IR::Type::U32,
|
||||||
|
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||||
|
.buffer_type = BufferType::SharedMemory,
|
||||||
|
.is_written = true,
|
||||||
|
});
|
||||||
|
for (IR::Block* const block : program.blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
if (!IsSharedAccess(inst)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const IR::U32 handle = ir.Imm32(binding);
|
||||||
|
// Replace shared atomics first
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
inst.ReplaceUsesWithAndRemove(
|
||||||
|
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||||
|
continue;
|
||||||
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
inst.ReplaceUsesWithAndRemove(
|
||||||
|
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||||
|
continue;
|
||||||
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
inst.ReplaceUsesWithAndRemove(
|
||||||
|
ir.BufferAtomicOr(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||||
|
continue;
|
||||||
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
|
case IR::Opcode::SharedAtomicUMax32: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||||
|
inst.ReplaceUsesWithAndRemove(
|
||||||
|
ir.BufferAtomicIMax(handle, inst.Arg(0), inst.Arg(1), is_signed, {}));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
|
case IR::Opcode::SharedAtomicUMin32: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||||
|
inst.ReplaceUsesWithAndRemove(
|
||||||
|
ir.BufferAtomicIMin(handle, inst.Arg(0), inst.Arg(1), is_signed, {}));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
inst.ReplaceUsesWithAndRemove(
|
||||||
|
ir.BufferAtomicXor(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Replace shared operations.
|
||||||
|
const IR::U32 offset = ir.IMul(ir.GetAttributeU32(IR::Attribute::WorkgroupIndex),
|
||||||
|
ir.Imm32(shared_memory_size));
|
||||||
|
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
ir.StoreBufferU32(2, handle, address, inst.Arg(1), {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Optimization
|
|
@ -65,10 +65,6 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
||||||
// Run optimization passes
|
// Run optimization passes
|
||||||
const auto stage = program.info.stage;
|
const auto stage = program.info.stage;
|
||||||
|
|
||||||
if (stage == Stage::Fragment) {
|
|
||||||
// Before SSA pass, as it will rewrite to VGPR load/store.
|
|
||||||
Shader::Optimization::LowerSharedMemToRegisters(program, runtime_info);
|
|
||||||
}
|
|
||||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||||
|
@ -90,11 +86,12 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
||||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||||
Shader::Optimization::ResourceTrackingPass(program);
|
Shader::Optimization::ResourceTrackingPass(program);
|
||||||
Shader::Optimization::LowerBufferFormatToRaw(program);
|
Shader::Optimization::LowerBufferFormatToRaw(program);
|
||||||
|
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);
|
||||||
|
Shader::Optimization::SharedMemoryBarrierPass(program, runtime_info, profile);
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
Shader::Optimization::CollectShaderInfoPass(program);
|
Shader::Optimization::CollectShaderInfoPass(program);
|
||||||
Shader::Optimization::SharedMemoryBarrierPass(program, profile);
|
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,6 +167,17 @@ enum class MrtSwizzle : u8 {
|
||||||
};
|
};
|
||||||
static constexpr u32 MaxColorBuffers = 8;
|
static constexpr u32 MaxColorBuffers = 8;
|
||||||
|
|
||||||
|
struct PsColorBuffer {
|
||||||
|
AmdGpu::NumberFormat num_format : 4;
|
||||||
|
AmdGpu::NumberConversion num_conversion : 2;
|
||||||
|
AmdGpu::Liverpool::ShaderExportFormat export_format : 4;
|
||||||
|
u32 needs_unorm_fixup : 1;
|
||||||
|
u32 pad : 21;
|
||||||
|
AmdGpu::CompMapping swizzle;
|
||||||
|
|
||||||
|
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||||
|
};
|
||||||
|
|
||||||
struct FragmentRuntimeInfo {
|
struct FragmentRuntimeInfo {
|
||||||
struct PsInput {
|
struct PsInput {
|
||||||
u8 param_index;
|
u8 param_index;
|
||||||
|
@ -184,15 +195,6 @@ struct FragmentRuntimeInfo {
|
||||||
AmdGpu::Liverpool::PsInput addr_flags;
|
AmdGpu::Liverpool::PsInput addr_flags;
|
||||||
u32 num_inputs;
|
u32 num_inputs;
|
||||||
std::array<PsInput, 32> inputs;
|
std::array<PsInput, 32> inputs;
|
||||||
struct PsColorBuffer {
|
|
||||||
AmdGpu::NumberFormat num_format;
|
|
||||||
AmdGpu::NumberConversion num_conversion;
|
|
||||||
AmdGpu::CompMapping swizzle;
|
|
||||||
AmdGpu::Liverpool::ShaderExportFormat export_format;
|
|
||||||
bool needs_unorm_fixup;
|
|
||||||
|
|
||||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
|
||||||
};
|
|
||||||
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
||||||
|
|
||||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||||
|
@ -264,3 +266,14 @@ struct RuntimeInfo {
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<Shader::Stage> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(const Shader::Stage stage, format_context& ctx) const {
|
||||||
|
constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"};
|
||||||
|
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
@ -98,12 +98,6 @@ struct StageSpecialization {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
if (info->has_emulated_shared_memory) {
|
|
||||||
binding++;
|
|
||||||
}
|
|
||||||
if (info->has_readconst) {
|
|
||||||
binding++;
|
|
||||||
}
|
|
||||||
ForEachSharp(binding, buffers, info->buffers,
|
ForEachSharp(binding, buffers, info->buffers,
|
||||||
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
spec.stride = sharp.GetStride();
|
spec.stride = sharp.GetStride();
|
||||||
|
@ -195,18 +189,6 @@ struct StageSpecialization {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
if (info->has_emulated_shared_memory != other.info->has_emulated_shared_memory) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (info->has_readconst != other.info->has_readconst) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (info->has_emulated_shared_memory) {
|
|
||||||
binding++;
|
|
||||||
}
|
|
||||||
if (info->has_readconst) {
|
|
||||||
binding++;
|
|
||||||
}
|
|
||||||
for (u32 i = 0; i < buffers.size(); i++) {
|
for (u32 i = 0; i < buffers.size(); i++) {
|
||||||
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -197,6 +197,10 @@ struct Liverpool {
|
||||||
return settings.lds_dwords.Value() * 128 * 4;
|
return settings.lds_dwords.Value() * 128 * 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 NumWorkgroups() const noexcept {
|
||||||
|
return dim_x * dim_y * dim_z;
|
||||||
|
}
|
||||||
|
|
||||||
bool IsTgidEnabled(u32 i) const noexcept {
|
bool IsTgidEnabled(u32 i) const noexcept {
|
||||||
return (settings.tgid_enable.Value() >> i) & 1;
|
return (settings.tgid_enable.Value() >> i) & 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,12 @@ struct Buffer {
|
||||||
u32 _padding1 : 6;
|
u32 _padding1 : 6;
|
||||||
u32 type : 2; // overlaps with T# type, so should be 0 for buffer
|
u32 type : 2; // overlaps with T# type, so should be 0 for buffer
|
||||||
|
|
||||||
|
static constexpr Buffer Null() {
|
||||||
|
Buffer buffer{};
|
||||||
|
buffer.base_address = 1;
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
bool Valid() const {
|
bool Valid() const {
|
||||||
return type == 0u;
|
return type == 0u;
|
||||||
}
|
}
|
||||||
|
|
|
@ -183,7 +183,7 @@ enum class NumberFormat : u32 {
|
||||||
Ubscaled = 13,
|
Ubscaled = 13,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class CompSwizzle : u32 {
|
enum class CompSwizzle : u8 {
|
||||||
Zero = 0,
|
Zero = 0,
|
||||||
One = 1,
|
One = 1,
|
||||||
Red = 4,
|
Red = 4,
|
||||||
|
@ -193,10 +193,10 @@ enum class CompSwizzle : u32 {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class NumberConversion : u32 {
|
enum class NumberConversion : u32 {
|
||||||
None,
|
None = 0,
|
||||||
UintToUscaled,
|
UintToUscaled = 1,
|
||||||
SintToSscaled,
|
SintToSscaled = 2,
|
||||||
UnormToUbnorm,
|
UnormToUbnorm = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CompMapping {
|
struct CompMapping {
|
||||||
|
|
|
@ -168,7 +168,7 @@ public:
|
||||||
void Commit();
|
void Commit();
|
||||||
|
|
||||||
/// Maps and commits a memory region with user provided data
|
/// Maps and commits a memory region with user provided data
|
||||||
u64 Copy(VAddr src, size_t size, size_t alignment = 0) {
|
u64 Copy(auto src, size_t size, size_t alignment = 0) {
|
||||||
const auto [data, offset] = Map(size, alignment);
|
const auto [data, offset] = Map(size, alignment);
|
||||||
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||||
Commit();
|
Commit();
|
||||||
|
|
|
@ -5,11 +5,8 @@
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
|
||||||
#include "shader_recompiler/info.h"
|
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
@ -18,8 +15,8 @@
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
static constexpr size_t DataShareBufferSize = 64_KB;
|
static constexpr size_t DataShareBufferSize = 64_KB;
|
||||||
static constexpr size_t StagingBufferSize = 1_GB;
|
static constexpr size_t StagingBufferSize = 512_MB;
|
||||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
static constexpr size_t UboStreamBufferSize = 128_MB;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||||
|
@ -29,10 +26,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||||
lds_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, DataShareBufferSize},
|
|
||||||
memory_tracker{&tracker} {
|
memory_tracker{&tracker} {
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), lds_buffer.Handle(), "LDS Buffer");
|
|
||||||
|
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
const auto null_id =
|
const auto null_id =
|
||||||
|
@ -251,14 +246,6 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
|
||||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
|
||||||
ASSERT(data.size_bytes() <= StreamThreshold);
|
|
||||||
const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
|
|
||||||
instance.UniformMinAlignment());
|
|
||||||
return {&stream_buffer, offset};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||||
bool is_texel_buffer, BufferId buffer_id) {
|
bool is_texel_buffer, BufferId buffer_id) {
|
||||||
// For small uniform buffers that have not been modified by gpu
|
// For small uniform buffers that have not been modified by gpu
|
||||||
|
|
|
@ -68,9 +68,9 @@ public:
|
||||||
return &gds_buffer;
|
return &gds_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a pointer to LDS device local buffer.
|
/// Retrieves the host visible device local stream buffer.
|
||||||
[[nodiscard]] const Buffer* GetLdsBuffer() const noexcept {
|
[[nodiscard]] StreamBuffer& GetStreamBuffer() noexcept {
|
||||||
return &lds_buffer;
|
return stream_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieves the buffer with the specified id.
|
/// Retrieves the buffer with the specified id.
|
||||||
|
@ -90,8 +90,6 @@ public:
|
||||||
/// Writes a value to GPU buffer.
|
/// Writes a value to GPU buffer.
|
||||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||||
|
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
|
|
||||||
|
|
||||||
/// Obtains a buffer for the specified region.
|
/// Obtains a buffer for the specified region.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||||
bool is_texel_buffer = false,
|
bool is_texel_buffer = false,
|
||||||
|
@ -159,7 +157,6 @@ private:
|
||||||
StreamBuffer staging_buffer;
|
StreamBuffer staging_buffer;
|
||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
Buffer gds_buffer;
|
Buffer gds_buffer;
|
||||||
Buffer lds_buffer;
|
|
||||||
std::shared_mutex mutex;
|
std::shared_mutex mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
RangeSet gpu_modified_ranges;
|
RangeSet gpu_modified_ranges;
|
||||||
|
|
|
@ -3,11 +3,9 @@
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
@ -29,23 +27,6 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
||||||
|
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||||
|
|
||||||
if (info->has_emulated_shared_memory) {
|
|
||||||
bindings.push_back({
|
|
||||||
.binding = binding++,
|
|
||||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if (info->has_readconst) {
|
|
||||||
bindings.push_back({
|
|
||||||
.binding = binding++,
|
|
||||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
for (const auto& buffer : info->buffers) {
|
for (const auto& buffer : info->buffers) {
|
||||||
const auto sharp = buffer.GetSharp(*info);
|
const auto sharp = buffer.GetSharp(*info);
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
|
|
|
@ -7,18 +7,13 @@
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/io_file.h"
|
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
@ -357,14 +352,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
||||||
if (!stage) {
|
if (!stage) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stage->has_readconst) {
|
|
||||||
bindings.push_back({
|
|
||||||
.binding = binding++,
|
|
||||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = gp_stage_flags,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
for (const auto& buffer : stage->buffers) {
|
for (const auto& buffer : stage->buffers) {
|
||||||
const auto sharp = buffer.GetSharp(*stage);
|
const auto sharp = buffer.GetSharp(*stage);
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
|
|
|
@ -35,8 +35,7 @@ struct GraphicsPipelineKey {
|
||||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||||
u32 num_color_attachments;
|
u32 num_color_attachments;
|
||||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||||
std::array<Shader::FragmentRuntimeInfo::PsColorBuffer, Liverpool::NumColorBuffers>
|
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
|
||||||
color_buffers;
|
|
||||||
vk::Format depth_format;
|
vk::Format depth_format;
|
||||||
vk::Format stencil_format;
|
vk::Format stencil_format;
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <ranges>
|
|
||||||
#include <span>
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include <fmt/ranges.h>
|
#include <fmt/ranges.h>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "sdl_window.h"
|
#include "sdl_window.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
|
@ -206,10 +203,9 @@ std::string Instance::GetDriverVersionName() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Instance::CreateDevice() {
|
bool Instance::CreateDevice() {
|
||||||
const vk::StructureChain feature_chain =
|
const vk::StructureChain feature_chain = physical_device.getFeatures2<
|
||||||
physical_device
|
vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVulkan11Features,
|
||||||
.getFeatures2<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVulkan12Features,
|
vk::PhysicalDeviceVulkan12Features, vk::PhysicalDeviceRobustness2FeaturesEXT,
|
||||||
vk::PhysicalDeviceRobustness2FeaturesEXT,
|
|
||||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
|
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
|
||||||
|
@ -319,6 +315,7 @@ bool Instance::CreateDevice() {
|
||||||
|
|
||||||
const auto topology_list_restart_features =
|
const auto topology_list_restart_features =
|
||||||
feature_chain.get<vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>();
|
feature_chain.get<vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>();
|
||||||
|
const auto vk11_features = feature_chain.get<vk::PhysicalDeviceVulkan11Features>();
|
||||||
const auto vk12_features = feature_chain.get<vk::PhysicalDeviceVulkan12Features>();
|
const auto vk12_features = feature_chain.get<vk::PhysicalDeviceVulkan12Features>();
|
||||||
vk::StructureChain device_chain = {
|
vk::StructureChain device_chain = {
|
||||||
vk::DeviceCreateInfo{
|
vk::DeviceCreateInfo{
|
||||||
|
@ -351,12 +348,17 @@ bool Instance::CreateDevice() {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceVulkan11Features{
|
vk::PhysicalDeviceVulkan11Features{
|
||||||
.shaderDrawParameters = true,
|
.storageBuffer16BitAccess = vk11_features.storageBuffer16BitAccess,
|
||||||
|
.uniformAndStorageBuffer16BitAccess = vk11_features.uniformAndStorageBuffer16BitAccess,
|
||||||
|
.shaderDrawParameters = vk11_features.shaderDrawParameters,
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceVulkan12Features{
|
vk::PhysicalDeviceVulkan12Features{
|
||||||
.samplerMirrorClampToEdge = vk12_features.samplerMirrorClampToEdge,
|
.samplerMirrorClampToEdge = vk12_features.samplerMirrorClampToEdge,
|
||||||
.drawIndirectCount = vk12_features.drawIndirectCount,
|
.drawIndirectCount = vk12_features.drawIndirectCount,
|
||||||
|
.storageBuffer8BitAccess = vk12_features.storageBuffer8BitAccess,
|
||||||
|
.uniformAndStorageBuffer8BitAccess = vk12_features.uniformAndStorageBuffer8BitAccess,
|
||||||
.shaderFloat16 = vk12_features.shaderFloat16,
|
.shaderFloat16 = vk12_features.shaderFloat16,
|
||||||
|
.shaderInt8 = vk12_features.shaderInt8,
|
||||||
.scalarBlockLayout = vk12_features.scalarBlockLayout,
|
.scalarBlockLayout = vk12_features.scalarBlockLayout,
|
||||||
.uniformBufferStandardLayout = vk12_features.uniformBufferStandardLayout,
|
.uniformBufferStandardLayout = vk12_features.uniformBufferStandardLayout,
|
||||||
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
|
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
|
||||||
|
|
|
@ -345,12 +345,12 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
|
|
||||||
key.color_formats[remapped_cb] =
|
key.color_formats[remapped_cb] =
|
||||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||||
key.color_buffers[remapped_cb] = {
|
key.color_buffers[remapped_cb] = Shader::PsColorBuffer{
|
||||||
.num_format = col_buf.GetNumberFmt(),
|
.num_format = col_buf.GetNumberFmt(),
|
||||||
.num_conversion = col_buf.GetNumberConversion(),
|
.num_conversion = col_buf.GetNumberConversion(),
|
||||||
.swizzle = col_buf.Swizzle(),
|
|
||||||
.export_format = regs.color_export_format.GetFormat(cb),
|
.export_format = regs.color_export_format.GetFormat(cb),
|
||||||
.needs_unorm_fixup = needs_unorm_fixup,
|
.needs_unorm_fixup = needs_unorm_fixup,
|
||||||
|
.swizzle = col_buf.Swizzle(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,20 @@
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||||
|
Shader::PushData push_data{};
|
||||||
|
push_data.step0 = regs.vgt_instance_step_rate_0;
|
||||||
|
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||||
|
|
||||||
|
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
||||||
|
// is encountered and implemented in the recompiler.
|
||||||
|
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
||||||
|
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
||||||
|
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
||||||
|
push_data.yscale = regs.viewport_control.yscale_enable ? regs.viewports[0].yscale : 1.f;
|
||||||
|
return push_data;
|
||||||
|
}
|
||||||
|
|
||||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||||
|
@ -426,95 +440,69 @@ void Rasterizer::Finish() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||||
|
if (IsComputeMetaClear(pipeline)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_writes.clear();
|
||||||
|
buffer_barriers.clear();
|
||||||
buffer_infos.clear();
|
buffer_infos.clear();
|
||||||
buffer_views.clear();
|
buffer_views.clear();
|
||||||
image_infos.clear();
|
image_infos.clear();
|
||||||
|
|
||||||
const auto& regs = liverpool->regs;
|
// Bind resource buffers and textures.
|
||||||
|
Shader::Backend::Bindings binding{};
|
||||||
|
Shader::PushData push_data = MakeUserData(liverpool->regs);
|
||||||
|
for (const auto* stage : pipeline->GetStages()) {
|
||||||
|
if (!stage) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
stage->PushUd(binding, push_data);
|
||||||
|
BindBuffers(*stage, binding, push_data);
|
||||||
|
BindTextures(*stage, binding);
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) {
|
||||||
|
if (!pipeline->IsCompute()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (pipeline->IsCompute()) {
|
|
||||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||||
|
|
||||||
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
|
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
|
||||||
bool meta_read = false;
|
|
||||||
for (const auto& desc : info.buffers) {
|
for (const auto& desc : info.buffers) {
|
||||||
if (desc.is_gds_buffer) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!desc.is_written) {
|
|
||||||
const VAddr address = desc.GetSharp(info).base_address;
|
const VAddr address = desc.GetSharp(info).base_address;
|
||||||
meta_read = texture_cache.IsMeta(address);
|
if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||||
// will need its full emulation anyways. For cases of metadata read a warning will be
|
// will need its full emulation anyways.
|
||||||
// logged.
|
|
||||||
if (!meta_read) {
|
|
||||||
for (const auto& desc : info.buffers) {
|
for (const auto& desc : info.buffers) {
|
||||||
const auto sharp = desc.GetSharp(info);
|
const VAddr address = desc.GetSharp(info).base_address;
|
||||||
const VAddr address = sharp.base_address;
|
if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) {
|
||||||
if (desc.is_written) {
|
|
||||||
// Assume all slices were updates
|
// Assume all slices were updates
|
||||||
if (texture_cache.ClearMeta(address)) {
|
|
||||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (texture_cache.IsMeta(address)) {
|
|
||||||
LOG_WARNING(Render_Vulkan,
|
|
||||||
"Unexpected metadata read by a CS shader (buffer)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
set_writes.clear();
|
|
||||||
buffer_barriers.clear();
|
|
||||||
|
|
||||||
// Bind resource buffers and textures.
|
|
||||||
Shader::PushData push_data{};
|
|
||||||
Shader::Backend::Bindings binding{};
|
|
||||||
|
|
||||||
for (const auto* stage : pipeline->GetStages()) {
|
|
||||||
if (!stage) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
|
||||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
|
||||||
|
|
||||||
// TODO(roamic): add support for multiple viewports and geometry shaders when ViewportIndex
|
|
||||||
// is encountered and implemented in the recompiler.
|
|
||||||
if (stage->stage == Shader::Stage::Vertex) {
|
|
||||||
push_data.xoffset =
|
|
||||||
regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
|
||||||
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
|
||||||
push_data.yoffset =
|
|
||||||
regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
|
||||||
push_data.yscale = regs.viewport_control.yscale_enable ? regs.viewports[0].yscale : 1.f;
|
|
||||||
}
|
|
||||||
stage->PushUd(binding, push_data);
|
|
||||||
|
|
||||||
BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);
|
|
||||||
BindTextures(*stage, binding, set_writes);
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||||
Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes,
|
Shader::PushData& push_data) {
|
||||||
Pipeline::BufferBarriers& buffer_barriers) {
|
|
||||||
buffer_bindings.clear();
|
buffer_bindings.clear();
|
||||||
|
|
||||||
for (const auto& desc : stage.buffers) {
|
for (const auto& desc : stage.buffers) {
|
||||||
const auto vsharp = desc.GetSharp(stage);
|
const auto vsharp = desc.GetSharp(stage);
|
||||||
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
|
if (!desc.IsSpecial() && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
|
||||||
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
|
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
|
||||||
buffer_bindings.emplace_back(buffer_id, vsharp);
|
buffer_bindings.emplace_back(buffer_id, vsharp);
|
||||||
} else {
|
} else {
|
||||||
|
@ -522,47 +510,30 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer
|
|
||||||
// (e.g. when the compute shared memory is bigger than the GPU's shared memory)
|
|
||||||
if (stage.has_emulated_shared_memory) {
|
|
||||||
const auto* lds_buf = buffer_cache.GetLdsBuffer();
|
|
||||||
buffer_infos.emplace_back(lds_buf->Handle(), 0, lds_buf->SizeBytes());
|
|
||||||
set_writes.push_back({
|
|
||||||
.dstSet = VK_NULL_HANDLE,
|
|
||||||
.dstBinding = binding.unified++,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
|
||||||
.pBufferInfo = &buffer_infos.back(),
|
|
||||||
});
|
|
||||||
++binding.buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
|
||||||
if (stage.has_readconst) {
|
|
||||||
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
|
|
||||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
|
|
||||||
stage.flattened_ud_buf.size() * sizeof(u32));
|
|
||||||
set_writes.push_back({
|
|
||||||
.dstSet = VK_NULL_HANDLE,
|
|
||||||
.dstBinding = binding.unified++,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
|
||||||
.pBufferInfo = &buffer_infos.back(),
|
|
||||||
});
|
|
||||||
++binding.buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second pass to re-bind buffers that were updated after binding
|
// Second pass to re-bind buffers that were updated after binding
|
||||||
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
||||||
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
||||||
const auto& desc = stage.buffers[i];
|
const auto& desc = stage.buffers[i];
|
||||||
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
|
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
|
||||||
|
// Buffer is not from the cache, either a special buffer or unbound.
|
||||||
if (!buffer_id) {
|
if (!buffer_id) {
|
||||||
if (desc.is_gds_buffer) {
|
if (desc.buffer_type == Shader::BufferType::GdsBuffer) {
|
||||||
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||||
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
||||||
|
} else if (desc.buffer_type == Shader::BufferType::ReadConstUbo) {
|
||||||
|
auto& vk_buffer = buffer_cache.GetStreamBuffer();
|
||||||
|
const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32);
|
||||||
|
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
|
||||||
|
instance.UniformMinAlignment());
|
||||||
|
buffer_infos.emplace_back(vk_buffer.Handle(), offset, ubo_size);
|
||||||
|
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
|
||||||
|
auto& lds_buffer = buffer_cache.GetStreamBuffer();
|
||||||
|
const auto& cs_program = liverpool->GetCsRegs();
|
||||||
|
const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups();
|
||||||
|
const auto [data, offset] =
|
||||||
|
lds_buffer.Map(lds_size, instance.StorageMinAlignment());
|
||||||
|
std::memset(data, 0, lds_size);
|
||||||
|
buffer_infos.emplace_back(lds_buffer.Handle(), offset, lds_size);
|
||||||
} else if (instance.IsNullDescriptorSupported()) {
|
} else if (instance.IsNullDescriptorSupported()) {
|
||||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||||
} else {
|
} else {
|
||||||
|
@ -605,8 +576,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding) {
|
||||||
Pipeline::DescriptorWrites& set_writes) {
|
|
||||||
image_bindings.clear();
|
image_bindings.clear();
|
||||||
|
|
||||||
for (const auto& image_desc : stage.images) {
|
for (const auto& image_desc : stage.images) {
|
||||||
|
|
|
@ -81,11 +81,9 @@ private:
|
||||||
bool FilterDraw();
|
bool FilterDraw();
|
||||||
|
|
||||||
void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||||
Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes,
|
Shader::PushData& push_data);
|
||||||
Pipeline::BufferBarriers& buffer_barriers);
|
|
||||||
|
|
||||||
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding);
|
||||||
Pipeline::DescriptorWrites& set_writes);
|
|
||||||
|
|
||||||
bool BindResources(const Pipeline* pipeline);
|
bool BindResources(const Pipeline* pipeline);
|
||||||
void ResetBindings() {
|
void ResetBindings() {
|
||||||
|
@ -95,6 +93,8 @@ private:
|
||||||
bound_images.clear();
|
bound_images.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsComputeMetaClear(const Pipeline* pipeline);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue