renderer_vulkan: use LDS buffer as SSBO on unsupported shared memory size (#2245)

* renderer_vulkan: use LDS buffer as SSBO on unsupported shared memory size

* shader_recompiler: add `v_trunc_f64` on inst format table
This commit is contained in:
poly 2025-01-31 12:52:31 +01:00 committed by GitHub
parent 8aea0fc7ee
commit eed4de1da9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 147 additions and 36 deletions

View file

@ -9,18 +9,33 @@ namespace Shader::Backend::SPIRV {
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
return ctx.OpLoad(ctx.U32[1], pointer);
if (ctx.info.has_emulated_shared_memory) {
const Id pointer =
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U32[1], pointer);
} else {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
return ctx.OpLoad(ctx.U32[1], pointer);
}
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
if (ctx.info.has_emulated_shared_memory) {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
} else {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
}
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
@ -29,8 +44,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
std::array<Id, 4> values{};
for (u32 i = 0; i < 4; ++i) {
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
if (ctx.info.has_emulated_shared_memory) {
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, index)};
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
} else {
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
}
}
return ctx.OpCompositeConstruct(ctx.U32[4], values);
}
@ -38,18 +59,33 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
ctx.OpStore(pointer, value);
if (ctx.info.has_emulated_shared_memory) {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, word_offset);
ctx.OpStore(pointer, value);
} else {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
ctx.OpStore(pointer, value);
}
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
if (ctx.info.has_emulated_shared_memory) {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
} else {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
}
}
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
@ -57,8 +93,14 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
for (u32 i = 0; i < 4; ++i) {
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
if (ctx.info.has_emulated_shared_memory) {
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, index)};
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
} else {
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
}
}
}

View file

@ -65,17 +65,17 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
const Info& info_, Bindings& binding_)
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, Info& info_,
Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
DefineSharedMemory();
DefineBuffers();
DefineTextureBuffers();
DefineImagesAndSamplers();
DefineSharedMemory();
}
EmitContext::~EmitContext() = default;
@ -852,20 +852,45 @@ void EmitContext::DefineSharedMemory() {
if (!info.uses_shared) {
return;
}
const u32 max_shared_memory_size = profile.max_shared_memory_size;
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size == 0) {
shared_memory_size = DefaultSharedMemSize;
}
const u32 max_shared_memory_size = runtime_info.cs_info.max_shared_memory_size;
ASSERT(shared_memory_size <= max_shared_memory_size);
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
interfaces.push_back(shared_memory_u32);
if (shared_memory_size <= max_shared_memory_size) {
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
Name(shared_memory_u32, "shared_mem");
interfaces.push_back(shared_memory_u32);
} else {
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
Decorate(type, spv::Decoration::ArrayStride, 4);
const Id struct_type{TypeStruct(type)};
Name(struct_type, "shared_memory_buf");
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
const Id struct_pointer_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
const Id ssbo_id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::StorageBuffer)};
Decorate(ssbo_id, spv::Decoration::Binding, binding.unified++);
Decorate(ssbo_id, spv::Decoration::DescriptorSet, 0U);
Name(ssbo_id, "shared_mem_ssbo");
shared_memory_u32 = ssbo_id;
info.has_emulated_shared_memory = true;
info.shared_memory_size = shared_memory_size;
interfaces.push_back(ssbo_id);
}
}
} // namespace Shader::Backend::SPIRV

View file

@ -37,7 +37,7 @@ struct VectorIds {
class EmitContext final : public Sirit::Module {
public:
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info,
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, Info& info,
Bindings& binding);
~EmitContext();
@ -132,7 +132,7 @@ public:
return ConstantComposite(type, constituents);
}
const Info& info;
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage;