spirv: Simplify shared memory handling (#427)

* spirv: Simplify shared memory handling

* spirv: Ignore clip plane

* spirv: Fix image offsets

* ir_pass: Implement shared memory lowering pass

* NVIDIA doesn't like using shared mem in fragment shader and softlocks driver

* spirv: Add log for ignoring pos1
This commit is contained in:
TheTurtle 2024-08-14 19:01:17 +03:00 committed by GitHub
parent 4b11dabd9e
commit d332a5e611
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 182 additions and 274 deletions

View file

@ -214,6 +214,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Position1) {
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
}

View file

@ -17,113 +17,133 @@ struct ImageOperands {
operands.push_back(value);
}
void AddOffset(EmitContext& ctx, const IR::Value& offset,
bool can_use_runtime_offsets = false) {
if (offset.IsEmpty()) {
return;
}
if (offset.IsImmediate()) {
const s32 operand = offset.U32();
Add(spv::ImageOperandsMask::ConstOffset, ctx.ConstS32(operand));
return;
}
IR::Inst* const inst{offset.InstRecursive()};
if (inst->AreAllArgsImmediates()) {
switch (inst->GetOpcode()) {
case IR::Opcode::CompositeConstructU32x2:
Add(spv::ImageOperandsMask::ConstOffset,
ctx.ConstS32(static_cast<s32>(inst->Arg(0).U32()),
static_cast<s32>(inst->Arg(1).U32())));
return;
case IR::Opcode::CompositeConstructU32x3:
Add(spv::ImageOperandsMask::ConstOffset,
ctx.ConstS32(static_cast<s32>(inst->Arg(0).U32()),
static_cast<s32>(inst->Arg(1).U32()),
static_cast<s32>(inst->Arg(2).U32())));
return;
default:
break;
}
}
if (can_use_runtime_offsets) {
Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
} else {
LOG_WARNING(Render_Vulkan,
"Runtime offset provided to unsupported image sample instruction");
}
}
spv::ImageOperandsMask mask{};
boost::container::static_vector<Id, 4> operands;
};
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
Id offset) {
const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(bias)) {
operands.Add(spv::ImageOperandsMask::Bias, bias);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.Add(spv::ImageOperandsMask::Bias, bias);
operands.AddOffset(ctx, offset);
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset) {
const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(lod)) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.Add(spv::ImageOperandsMask::Lod, lod);
operands.AddOffset(ctx, offset);
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias, Id offset) {
Id bias, const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(bias)) {
operands.Add(spv::ImageOperandsMask::Bias, bias);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.Add(spv::ImageOperandsMask::Bias, bias);
operands.AddOffset(ctx, offset);
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id lod, Id offset) {
Id lod, const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(lod)) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod);
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) {
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.AddOffset(ctx, offset);
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
operands.operands);
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
Id offset2, Id dref) {
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, Id dref) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.AddOffset(ctx, offset);
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms) {
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
Id lod, Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id result_type = texture.data_types->Get(4);
if (Sirit::ValidId(lod)) {
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords,
spv::ImageOperandsMask::Lod, lod));
} else {
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords));
}
ImageOperands operands;
operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod);
return ctx.OpBitcast(
ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands));
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {

View file

@ -93,15 +93,9 @@ Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx);
Id EmitUndefU32(EmitContext& ctx);
Id EmitUndefU64(EmitContext& ctx);
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
@ -358,18 +352,19 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitConvertU32U16(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
Id offset);
const IR::Value& offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset);
const IR::Value& offset);
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias, Id offset);
Id bias, const IR::Value& offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id lod, Id offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
Id offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms);
Id lod, const IR::Value& offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
Id lod, Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,

View file

@ -5,99 +5,25 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
const Id shift_id{ctx.ConstU32(shift)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
}
Id Word(EmitContext& ctx, Id offset) {
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
return ctx.OpLoad(ctx.U32[1], pointer);
}
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
const Id count_id{ctx.ConstU32(count)};
return {bit, count_id};
}
} // Anonymous namespace
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
return ctx.OpLoad(ctx.U32[1], pointer);
} else {
return Word(ctx, offset);
}
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
return ctx.OpLoad(ctx.U32[2], pointer);
} else {
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
return ctx.OpLoad(ctx.U32[4], pointer);
}
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
std::array<Id, 4> values{};
@ -109,35 +35,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
return ctx.OpCompositeConstruct(ctx.U32[4], values);
}
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
}
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
}
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
Id pointer{};
if (ctx.profile.support_explicit_workgroup_layout) {
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
} else {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
}
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
ctx.OpStore(pointer, value);
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
@ -148,11 +53,6 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
}
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
for (u32 i = 0; i < 4; ++i) {

View file

@ -513,43 +513,9 @@ void EmitContext::DefineSharedMemory() {
if (info.shared_memory_size == 0) {
info.shared_memory_size = DefaultSharedMemSize;
}
const auto make{[&](Id element_type, u32 element_size) {
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
const Id struct_type{TypeStruct(array_type)};
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
Decorate(struct_type, spv::Decoration::Block);
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
Decorate(variable, spv::Decoration::Aliased);
interfaces.push_back(variable);
return std::make_tuple(variable, element_pointer, pointer);
}};
if (profile.support_explicit_workgroup_layout) {
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
if (info.uses_shared_u8) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
}
if (info.uses_shared_u16) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
}
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
return;
}
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
interfaces.push_back(shared_memory_u32);