shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available
This commit is contained in:
parent
84298ce191
commit
e860870dd2
20 changed files with 730 additions and 36 deletions
|
@ -9,6 +9,7 @@
|
|||
#include <fmt/format.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
|
|||
}
|
||||
|
||||
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
|
||||
: Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} {
|
||||
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineCommonTypes(program.info);
|
||||
DefineCommonConstants();
|
||||
DefineInterfaces(program.info);
|
||||
DefineLocalMemory(program);
|
||||
DefineSharedMemory(program);
|
||||
DefineConstantBuffers(program.info, binding);
|
||||
DefineStorageBuffers(program.info, binding);
|
||||
DefineTextures(program.info, binding);
|
||||
|
@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
|||
F32.Define(*this, TypeFloat(32), "f32");
|
||||
U32.Define(*this, TypeInt(32, false), "u32");
|
||||
|
||||
private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
|
||||
|
||||
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
|
||||
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
|
||||
input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
|
||||
|
@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) {
|
|||
DefineOutputs(info);
|
||||
}
|
||||
|
||||
void EmitContext::DefineLocalMemory(const IR::Program& program) {
|
||||
if (program.local_memory_size == 0) {
|
||||
return;
|
||||
}
|
||||
const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
|
||||
const Id pointer{TypePointer(spv::StorageClass::Private, type)};
|
||||
local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
|
||||
if (profile.supported_spirv >= 0x00010400) {
|
||||
interfaces.push_back(local_memory);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
||||
if (program.shared_memory_size == 0) {
|
||||
return;
|
||||
}
|
||||
const auto make{[&](Id element_type, u32 element_size) {
|
||||
const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
|
||||
const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))};
|
||||
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||
|
||||
const Id struct_type{TypeStruct(array_type)};
|
||||
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
|
||||
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
|
||||
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
|
||||
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
|
||||
Decorate(variable, spv::Decoration::Aliased);
|
||||
interfaces.push_back(variable);
|
||||
|
||||
return std::make_pair(variable, element_pointer);
|
||||
}};
|
||||
if (profile.support_explicit_workgroup_layout) {
|
||||
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||
if (program.info.uses_int8) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
||||
std::tie(shared_memory_u8, shared_u8) = make(U8, 1);
|
||||
}
|
||||
if (program.info.uses_int16) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||
std::tie(shared_memory_u16, shared_u16) = make(U16, 2);
|
||||
}
|
||||
std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4);
|
||||
std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8);
|
||||
std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16);
|
||||
}
|
||||
const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)};
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup);
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
|
||||
const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
|
||||
const auto make_function{[&](u32 mask, u32 size) {
|
||||
const Id loop_header{OpLabel()};
|
||||
const Id continue_block{OpLabel()};
|
||||
const Id merge_block{OpLabel()};
|
||||
|
||||
const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const Id offset{OpFunctionParameter(U32[1])};
|
||||
const Id insert_value{OpFunctionParameter(U32[1])};
|
||||
AddLabel();
|
||||
OpBranch(loop_header);
|
||||
|
||||
AddLabel(loop_header);
|
||||
const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))};
|
||||
const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))};
|
||||
const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))};
|
||||
const Id count{Constant(U32[1], size)};
|
||||
OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
|
||||
OpBranch(continue_block);
|
||||
|
||||
AddLabel(continue_block);
|
||||
const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
|
||||
const Id old_value{OpLoad(U32[1], word_pointer)};
|
||||
const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
|
||||
const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U),
|
||||
u32_zero_value, u32_zero_value, new_value,
|
||||
old_value)};
|
||||
const Id success{OpIEqual(U1, atomic_res, old_value)};
|
||||
OpBranchConditional(success, merge_block, loop_header);
|
||||
|
||||
AddLabel(merge_block);
|
||||
OpReturn();
|
||||
OpFunctionEnd();
|
||||
return func;
|
||||
}};
|
||||
if (program.info.uses_int8) {
|
||||
shared_store_u8_func = make_function(24, 8);
|
||||
}
|
||||
if (program.info.uses_int16) {
|
||||
shared_store_u16_func = make_function(16, 16);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
|
||||
if (info.constant_buffer_descriptors.empty()) {
|
||||
return;
|
||||
|
@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
|
|||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("ssbo{}", index));
|
||||
if (profile.supported_spirv >= 0x00010400) {
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
std::fill_n(ssbos.data() + index, desc.count, id);
|
||||
index += desc.count;
|
||||
binding += desc.count;
|
||||
|
@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
|
|||
.image_type{image_type},
|
||||
});
|
||||
}
|
||||
if (profile.supported_spirv >= 0x00010400) {
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
binding += desc.count;
|
||||
}
|
||||
}
|
||||
|
@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions:
|
|||
for (size_t i = 0; i < desc.count; ++i) {
|
||||
cbufs[desc.index + i].*member_type = id;
|
||||
}
|
||||
if (profile.supported_spirv >= 0x00010400) {
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
binding += desc.count;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,6 +73,14 @@ public:
|
|||
|
||||
UniformDefinitions uniform_types;
|
||||
|
||||
Id private_u32{};
|
||||
|
||||
Id shared_u8{};
|
||||
Id shared_u16{};
|
||||
Id shared_u32{};
|
||||
Id shared_u32x2{};
|
||||
Id shared_u32x4{};
|
||||
|
||||
Id input_f32{};
|
||||
Id input_u32{};
|
||||
Id input_s32{};
|
||||
|
@ -96,6 +104,17 @@ public:
|
|||
Id base_vertex{};
|
||||
Id front_face{};
|
||||
|
||||
Id local_memory{};
|
||||
|
||||
Id shared_memory_u8{};
|
||||
Id shared_memory_u16{};
|
||||
Id shared_memory_u32{};
|
||||
Id shared_memory_u32x2{};
|
||||
Id shared_memory_u32x4{};
|
||||
|
||||
Id shared_store_u8_func{};
|
||||
Id shared_store_u16_func{};
|
||||
|
||||
Id input_position{};
|
||||
std::array<Id, 32> input_generics{};
|
||||
|
||||
|
@ -111,6 +130,8 @@ private:
|
|||
void DefineCommonTypes(const Info& info);
|
||||
void DefineCommonConstants();
|
||||
void DefineInterfaces(const Info& info);
|
||||
void DefineLocalMemory(const IR::Program& program);
|
||||
void DefineSharedMemory(const IR::Program& program);
|
||||
void DefineConstantBuffers(const Info& info, u32& binding);
|
||||
void DefineStorageBuffers(const Info& info, u32& binding);
|
||||
void DefineTextures(const Info& info, u32& binding);
|
||||
|
|
|
@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx);
|
|||
void EmitSetOFlag(EmitContext& ctx);
|
||||
Id EmitWorkgroupId(EmitContext& ctx);
|
||||
Id EmitLocalInvocationId(EmitContext& ctx);
|
||||
Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
|
||||
void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
|
||||
Id EmitUndefU1(EmitContext& ctx);
|
||||
Id EmitUndefU8(EmitContext& ctx);
|
||||
Id EmitUndefU16(EmitContext& ctx);
|
||||
|
@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
|
|||
Id value);
|
||||
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
|
||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
||||
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
||||
|
|
|
@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) {
|
|||
return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
|
||||
}
|
||||
|
||||
Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
|
||||
const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
|
||||
void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
|
||||
const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
175
src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
Normal file
175
src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
Normal file
|
@ -0,0 +1,175 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
||||
}
|
||||
|
||||
Id Word(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
|
||||
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
|
||||
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))};
|
||||
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))};
|
||||
const Id count_id{ctx.Constant(ctx.U32[1], count)};
|
||||
return {bit, count_id};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
} else {
|
||||
return Word(ctx, offset);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
return ctx.OpLoad(ctx.U32[2], pointer);
|
||||
} else {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
return ctx.OpLoad(ctx.U32[4], pointer);
|
||||
}
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
std::array<Id, 4> values{};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const Id index{i == 0 ? base_index
|
||||
: ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||
} else {
|
||||
ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||
} else {
|
||||
ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||
Id pointer{};
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
|
||||
} else {
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
}
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const Id index{i == 0 ? base_index
|
||||
: ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
Loading…
Add table
Add a link
Reference in a new issue