mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-13 05:05:57 +00:00
shader_recompiler: Remove special case buffers and add support for aliasing (#2428)
* shader_recompiler: Move shared mem lowering into emitter * IR can be quite verbose during first stages of translation, before ssa and constant prop passes have run that drastically simplify it. This lowering can also be done during emission so why not do it then to save some compilation time * runtime_info: Pack PsColorBuffer into 8 bytes * Drops the size of the total structure by half from 396 to 204 bytes. Also should make comparison of the array a bit faster, since its a hot path done every draw * emit_spirv_context: Add infrastructure for buffer aliases * Splits out the buffer creation function so it can be reused when defining multiple type aliases * shader_recompiler: Merge srt_flatbuf into buffers list * Its no longer a special case, yay * shader_recompiler: Complete buffer aliasing support * Add a bunch more types into buffers, such as F32 for float reads/writes and 8/16 bit integer types for formatted buffers * shader_recompiler: Remove existing shared memory emulation * The current impl relies on backend side implementaton and hooking into every shared memory access. It also doesnt handle atomics. Will be replaced by an IR pass that solves these issues * shader_recompiler: Reintroduce shared memory on ssbo emulation * Now it is performed with an IR pass, and combined with the previous commit cleanup, is fully transparent from the backend, other than requiring workgroup_index be provided as an attribute (computing this on every shared memory access is gonna be too verbose * clang format * buffer_cache: Reduce buffer sizes * vk_rasterizer: Cleanup resource binding code * Reduce noise in the functions, also remove some arguments which are class members * Fix gcc
This commit is contained in:
parent
290e127a4f
commit
82cacec8eb
36 changed files with 675 additions and 625 deletions
|
@ -242,14 +242,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
ctx.AddCapability(spv::Capability::Int8);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
ctx.AddCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
|
||||
ctx.AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||
if (info.uses_fp16) {
|
||||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
}
|
||||
if (info.uses_fp64) {
|
||||
ctx.AddCapability(spv::Capability::Float64);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
if (info.has_storage_images) {
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||
|
|
|
@ -23,10 +23,13 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
|||
|
||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
|
||||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||
}
|
||||
|
@ -165,17 +168,17 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
|
|||
}
|
||||
|
||||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
auto& buffer = ctx.buffers[binding];
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
|
||||
ctx.ConstU32(gds_addr));
|
||||
const auto& buffer = ctx.buffers[binding];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
|
||||
}
|
||||
|
||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
auto& buffer = ctx.buffers[binding];
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
|
||||
ctx.ConstU32(gds_addr));
|
||||
const auto& buffer = ctx.buffers[binding];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
|
||||
}
|
||||
|
|
|
@ -160,21 +160,25 @@ void EmitGetGotoVariable(EmitContext&) {
|
|||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
using BufferAlias = EmitContext::BufferAlias;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
ASSERT(ctx.srt_flatbuf.binding >= 0);
|
||||
ASSERT(flatbuf_off_dw > 0);
|
||||
Id index = ctx.ConstU32(flatbuf_off_dw);
|
||||
auto& buffer = ctx.srt_flatbuf;
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
const auto& srt_flatbuf = ctx.buffers.back();
|
||||
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
|
||||
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
}
|
||||
|
||||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||
|
@ -184,7 +188,7 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
|||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
}
|
||||
|
||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (IR::IsPosition(attr)) {
|
||||
ASSERT(attr == IR::Attribute::Position0);
|
||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
|
@ -285,6 +289,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
return EmitReadStepRate(ctx, 0);
|
||||
case IR::Attribute::InstanceId1:
|
||||
return EmitReadStepRate(ctx, 1);
|
||||
case IR::Attribute::WorkgroupIndex:
|
||||
return ctx.workgroup_index_id;
|
||||
case IR::Attribute::WorkgroupId:
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
||||
case IR::Attribute::LocalInvocationId:
|
||||
|
@ -396,140 +402,158 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
|||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
template <u32 N, BufferAlias alias>
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(data_types[1], ptr);
|
||||
} else {
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i)};
|
||||
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i)};
|
||||
ids.push_back(ctx.OpLoad(data_types[1], ptr));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(buffer.data_types->Get(N), ids);
|
||||
return ctx.OpCompositeConstruct(data_types[N], ids);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(8u));
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr));
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(16u));
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr));
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<1>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<2>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<3>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<4>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[1], EmitLoadBufferU32(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[2], EmitLoadBufferU32x2(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[3], EmitLoadBufferU32x3(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[4], EmitLoadBufferU32x4(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE_MSG("SPIR-V instruction");
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
template <u32 N, BufferAlias alias>
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, value);
|
||||
} else {
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr =
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i));
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(data_types[1], value, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(8u))};
|
||||
EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U8, value));
|
||||
}
|
||||
|
||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(16u))};
|
||||
EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U16, value));
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<1>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[1], value));
|
||||
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32x2(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[2], value));
|
||||
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32x3(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[3], value));
|
||||
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32x4(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[4], value));
|
||||
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
|
|
|
@ -9,65 +9,35 @@ namespace Shader::Backend::SPIRV {
|
|||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
if (ctx.info.has_emulated_shared_memory) {
|
||||
const Id pointer =
|
||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
} else {
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||
if (ctx.info.has_emulated_shared_memory) {
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||
ctx.u32_zero_value, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||
ctx.u32_zero_value, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
} else {
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
if (ctx.info.has_emulated_shared_memory) {
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||
ctx.u32_zero_value, word_offset);
|
||||
ctx.OpStore(pointer, value);
|
||||
} else {
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||
if (ctx.info.has_emulated_shared_memory) {
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||
ctx.u32_zero_value, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||
ctx.u32_zero_value, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
} else {
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
}
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -11,6 +11,9 @@ void EmitPrologue(EmitContext& ctx) {
|
|||
if (ctx.stage == Stage::Fragment) {
|
||||
ctx.DefineInterpolatedAttribs();
|
||||
}
|
||||
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
|
||||
ctx.DefineWorkgroupIndex();
|
||||
}
|
||||
ctx.DefineBufferOffsets();
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
|
@ -107,6 +106,8 @@ Id EmitContext::Def(const IR::Value& value) {
|
|||
void EmitContext::DefineArithmeticTypes() {
|
||||
void_id = Name(TypeVoid(), "void_id");
|
||||
U1[1] = Name(TypeBool(), "bool_id");
|
||||
U8 = Name(TypeUInt(8), "u8_id");
|
||||
U16 = Name(TypeUInt(16), "u16_id");
|
||||
if (info.uses_fp16) {
|
||||
F16[1] = Name(TypeFloat(16), "f16_id");
|
||||
U16 = Name(TypeUInt(16), "u16_id");
|
||||
|
@ -193,6 +194,9 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
|
||||
void EmitContext::DefineBufferOffsets() {
|
||||
for (BufferDefinition& buffer : buffers) {
|
||||
if (buffer.buffer_type != BufferType::Guest) {
|
||||
continue;
|
||||
}
|
||||
const u32 binding = buffer.binding;
|
||||
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
|
@ -211,8 +215,7 @@ void EmitContext::DefineInterpolatedAttribs() {
|
|||
if (!profile.needs_manual_interpolation) {
|
||||
return;
|
||||
}
|
||||
// Iterate all input attributes, load them and manually interpolate with barycentric
|
||||
// coordinates.
|
||||
// Iterate all input attributes, load them and manually interpolate.
|
||||
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||
const auto& input = runtime_info.fs_info.inputs[i];
|
||||
const u32 semantic = input.param_index;
|
||||
|
@ -237,6 +240,20 @@ void EmitContext::DefineInterpolatedAttribs() {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineWorkgroupIndex() {
|
||||
const Id workgroup_id_val{OpLoad(U32[3], workgroup_id)};
|
||||
const Id workgroup_x{OpCompositeExtract(U32[1], workgroup_id_val, 0)};
|
||||
const Id workgroup_y{OpCompositeExtract(U32[1], workgroup_id_val, 1)};
|
||||
const Id workgroup_z{OpCompositeExtract(U32[1], workgroup_id_val, 2)};
|
||||
const Id num_workgroups{OpLoad(U32[3], num_workgroups_id)};
|
||||
const Id num_workgroups_x{OpCompositeExtract(U32[1], num_workgroups, 0)};
|
||||
const Id num_workgroups_y{OpCompositeExtract(U32[1], num_workgroups, 1)};
|
||||
workgroup_index_id =
|
||||
OpIAdd(U32[1], OpIAdd(U32[1], workgroup_x, OpIMul(U32[1], workgroup_y, num_workgroups_x)),
|
||||
OpIMul(U32[1], workgroup_z, OpIMul(U32[1], num_workgroups_x, num_workgroups_y)));
|
||||
Name(workgroup_index_id, "workgroup_index");
|
||||
}
|
||||
|
||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||
switch (default_value) {
|
||||
case 0:
|
||||
|
@ -305,9 +322,16 @@ void EmitContext::DefineInputs() {
|
|||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
if (info.loads.GetAny(IR::Attribute::FragCoord)) {
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
}
|
||||
if (info.stores.Get(IR::Attribute::Depth)) {
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
}
|
||||
if (info.loads.Get(IR::Attribute::IsFrontFace)) {
|
||||
front_facing =
|
||||
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
}
|
||||
if (profile.needs_manual_interpolation) {
|
||||
gl_bary_coord_id =
|
||||
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||
|
@ -342,9 +366,19 @@ void EmitContext::DefineInputs() {
|
|||
}
|
||||
break;
|
||||
case LogicalStage::Compute:
|
||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
if (info.loads.GetAny(IR::Attribute::WorkgroupIndex) ||
|
||||
info.loads.GetAny(IR::Attribute::WorkgroupId)) {
|
||||
workgroup_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||
}
|
||||
if (info.loads.GetAny(IR::Attribute::WorkgroupIndex)) {
|
||||
num_workgroups_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::NumWorkgroups, spv::StorageClass::Input);
|
||||
}
|
||||
if (info.loads.GetAny(IR::Attribute::LocalInvocationId)) {
|
||||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
}
|
||||
break;
|
||||
case LogicalStage::Geometry: {
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
|
@ -588,78 +622,74 @@ void EmitContext::DefinePushDataBlock() {
|
|||
interfaces.push_back(push_data_block);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
boost::container::small_vector<Id, 8> type_ids;
|
||||
const auto define_struct = [&](Id record_array_type, bool is_instance_data,
|
||||
std::optional<std::string_view> explicit_name = {}) {
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
|
||||
return struct_type;
|
||||
}
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
|
||||
: fmt::format("{}_cbuf_block_f32", stage);
|
||||
name = explicit_name.value_or(name);
|
||||
Name(struct_type, name);
|
||||
EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_written, u32 elem_shift,
|
||||
BufferType buffer_type, Id data_type) {
|
||||
// Define array type.
|
||||
const Id max_num_items = ConstU32(u32(profile.max_ubo_size) >> elem_shift);
|
||||
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
||||
: TypeArray(data_type, max_num_items)};
|
||||
// Define block struct type. Don't perform decorations twice on the same Id.
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
if (std::ranges::find(buf_type_ids, record_array_type.value, &Id::value) ==
|
||||
buf_type_ids.end()) {
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 1 << elem_shift);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
type_ids.push_back(record_array_type);
|
||||
return struct_type;
|
||||
};
|
||||
|
||||
if (info.has_readconst) {
|
||||
const Id data_type = U32[1];
|
||||
const auto storage_class = spv::StorageClass::Uniform;
|
||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||
const Id record_array_type{
|
||||
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
|
||||
|
||||
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
|
||||
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, "srt_flatbuf_ubo");
|
||||
|
||||
srt_flatbuf = {
|
||||
.id = id,
|
||||
.binding = binding.buffer++,
|
||||
.pointer_type = pointer_type,
|
||||
};
|
||||
interfaces.push_back(id);
|
||||
buf_type_ids.push_back(record_array_type);
|
||||
}
|
||||
// Define buffer binding interface.
|
||||
const auto storage_class =
|
||||
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
if (is_storage && !is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
}
|
||||
switch (buffer_type) {
|
||||
case Shader::BufferType::GdsBuffer:
|
||||
Name(id, "gds_buffer");
|
||||
break;
|
||||
case Shader::BufferType::ReadConstUbo:
|
||||
Name(id, "srt_flatbuf_ubo");
|
||||
break;
|
||||
case Shader::BufferType::SharedMemory:
|
||||
Name(id, "ssbo_shmem");
|
||||
break;
|
||||
default:
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer));
|
||||
}
|
||||
interfaces.push_back(id);
|
||||
return {id, pointer_type};
|
||||
};
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(sharp, profile);
|
||||
const u32 array_size = profile.max_ubo_size >> 2;
|
||||
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
||||
: TypeArray(data_type, ConstU32(array_size))};
|
||||
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
|
||||
const auto buf_sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(buf_sharp, profile);
|
||||
|
||||
const auto storage_class =
|
||||
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
if (is_storage && !desc.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
// Define aliases depending on the shader usage.
|
||||
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
spv_buffer[BufferAlias::U32] =
|
||||
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
||||
}
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sharp_idx));
|
||||
|
||||
buffers.push_back({
|
||||
.id = id,
|
||||
.binding = binding.buffer++,
|
||||
.data_types = data_types,
|
||||
.pointer_type = pointer_type,
|
||||
});
|
||||
interfaces.push_back(id);
|
||||
if (True(desc.used_types & IR::Type::F32)) {
|
||||
spv_buffer[BufferAlias::F32] =
|
||||
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U16)) {
|
||||
spv_buffer[BufferAlias::U16] =
|
||||
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U8)) {
|
||||
spv_buffer[BufferAlias::U8] =
|
||||
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
|
||||
}
|
||||
++binding.unified;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -809,51 +839,18 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory() {
|
||||
static constexpr size_t DefaultSharedMemSize = 2_KB;
|
||||
if (!info.uses_shared) {
|
||||
return;
|
||||
}
|
||||
ASSERT(info.stage == Stage::Compute);
|
||||
|
||||
const u32 max_shared_memory_size = profile.max_shared_memory_size;
|
||||
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||
if (shared_memory_size == 0) {
|
||||
shared_memory_size = DefaultSharedMemSize;
|
||||
}
|
||||
|
||||
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||
|
||||
if (shared_memory_size <= max_shared_memory_size) {
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||
Name(shared_memory_u32, "shared_mem");
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
} else {
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
|
||||
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
||||
|
||||
Decorate(type, spv::Decoration::ArrayStride, 4);
|
||||
|
||||
const Id struct_type{TypeStruct(type)};
|
||||
Name(struct_type, "shared_memory_buf");
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
|
||||
const Id struct_pointer_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
|
||||
const Id ssbo_id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::StorageBuffer)};
|
||||
Decorate(ssbo_id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(ssbo_id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(ssbo_id, "shared_mem_ssbo");
|
||||
|
||||
shared_memory_u32 = ssbo_id;
|
||||
|
||||
info.has_emulated_shared_memory = true;
|
||||
info.shared_memory_size = shared_memory_size;
|
||||
interfaces.push_back(ssbo_id);
|
||||
}
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||
Name(shared_memory_u32, "shared_mem");
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
}
|
||||
|
||||
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
@ -45,6 +45,7 @@ public:
|
|||
|
||||
void DefineBufferOffsets();
|
||||
void DefineInterpolatedAttribs();
|
||||
void DefineWorkgroupIndex();
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
|
@ -200,8 +201,10 @@ public:
|
|||
std::array<Id, 30> patches{};
|
||||
|
||||
Id workgroup_id{};
|
||||
Id num_workgroups_id{};
|
||||
Id workgroup_index_id{};
|
||||
Id local_invocation_id{};
|
||||
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
|
||||
Id invocation_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
|
@ -227,18 +230,38 @@ public:
|
|||
bool is_storage = false;
|
||||
};
|
||||
|
||||
struct BufferDefinition {
|
||||
enum class BufferAlias : u32 {
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
F32,
|
||||
NumAlias,
|
||||
};
|
||||
|
||||
struct BufferSpv {
|
||||
Id id;
|
||||
Id offset;
|
||||
Id offset_dwords;
|
||||
u32 binding;
|
||||
const VectorIds* data_types;
|
||||
Id pointer_type;
|
||||
};
|
||||
|
||||
struct BufferDefinition {
|
||||
u32 binding;
|
||||
BufferType buffer_type;
|
||||
Id offset;
|
||||
Id offset_dwords;
|
||||
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
|
||||
|
||||
const BufferSpv& operator[](BufferAlias alias) const {
|
||||
return aliases[u32(alias)];
|
||||
}
|
||||
|
||||
BufferSpv& operator[](BufferAlias alias) {
|
||||
return aliases[u32(alias)];
|
||||
}
|
||||
};
|
||||
|
||||
Bindings& binding;
|
||||
boost::container::small_vector<Id, 16> buf_type_ids;
|
||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
BufferDefinition srt_flatbuf;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
|
@ -279,6 +302,9 @@ private:
|
|||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
|
||||
bool output);
|
||||
|
||||
BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type,
|
||||
Id data_type);
|
||||
|
||||
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
|
||||
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue