video_core: Account of runtime state changes when compiling shaders (#575)

* video_core: Compile shader permutations

* spirv: Only specific storage image format for atomics

* ir: Avoid cube coord patching for storage image

* spirv: Fix default attributes

* data_share: Add more instructions

* video_core: Query storage flag with runtime state

* kernel: Use std::list for semaphore

* video_core: Use texture buffers for untyped format load/store

* buffer_cache: Limit view usage

* vk_pipeline_cache: Fix invalid iterator

* image_view: Reduce log spam when alpha=1 in storage swizzle

* video_core: More features and proper spirv feature detection

* video_core: Attempt no2 for specialization

* spirv: Remove conflict

* vk_shader_cache: Small cleanup
This commit is contained in:
TheTurtle 2024-08-29 19:29:54 +03:00 committed by GitHub
parent 790d19e59b
commit 66e96dd944
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 1058 additions and 976 deletions

View file

@ -41,13 +41,14 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding_)
: Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
stage{program.info.stage}, binding{binding_} {
EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage},
binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
DefineBuffers();
DefineTextureBuffers();
DefineImagesAndSamplers();
DefineSharedMemory();
}
@ -123,25 +124,24 @@ void EmitContext::DefineInterfaces() {
DefineOutputs();
}
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
case AmdGpu::NumberFormat::Uint:
return ctx.U32[4];
case AmdGpu::NumberFormat::Sscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Uscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Srgb:
return ctx.F32;
case AmdGpu::NumberFormat::Sint:
return ctx.S32;
case AmdGpu::NumberFormat::Uint:
return ctx.U32;
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
}
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
@ -162,7 +162,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
}
void EmitContext::DefineBufferOffsets() {
@ -177,6 +177,16 @@ void EmitContext::DefineBufferOffsets() {
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
}
for (auto& tex_buffer : texture_buffers) {
const u32 binding = tex_buffer.binding;
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
}
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
@ -195,6 +205,11 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
}
void EmitContext::DefineInputs() {
if (info.uses_lane_id) {
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
switch (stage) {
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
@ -202,7 +217,7 @@ void EmitContext::DefineInputs() {
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id type{GetAttributeType(*this, input.fmt)[4]};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
@ -229,15 +244,12 @@ void EmitContext::DefineInputs() {
break;
}
case Stage::Fragment:
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
for (const auto& input : info.ps_inputs) {
const u32 semantic = input.param_index;
if (input.is_default) {
if (input.is_default && !input.is_flat) {
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
F32[1], 4, true};
continue;
@ -328,47 +340,74 @@ void EmitContext::DefinePushDataBlock() {
void EmitContext::DefineBuffers() {
boost::container::small_vector<Id, 8> type_ids;
for (u32 i = 0; const auto& buffer : info.buffers) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
const Id data_type = (*data_types)[1];
const Id record_array_type{buffer.is_storage
? TypeRuntimeArray(data_type)
: TypeArray(data_type, ConstU32(buffer.length))};
const auto define_struct = [&](Id record_array_type, bool is_instance_data) {
const Id struct_type{TypeStruct(record_array_type)};
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name =
buffer.is_instance_data
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
sizeof(float) * CHAR_BIT)
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
type_ids.push_back(record_array_type);
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
return struct_type;
}
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
: fmt::format("{}_cbuf_block_f32", stage);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
type_ids.push_back(record_array_type);
return struct_type;
};
for (const auto& desc : info.buffers) {
const auto sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(sharp);
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
const Id data_type = (*data_types)[1];
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
: TypeArray(data_type, ConstU32(sharp.NumDwords()))};
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
const auto storage_class =
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
const Id pointer_type = TypePointer(storage_class, data_type);
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", buffer.is_storage ? "ssbo" : "cbuf", buffer.sgpr_base));
if (is_storage && !desc.is_written) {
Decorate(id, spv::Decoration::NonWritable);
}
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
buffers.push_back({
.id = id,
.binding = binding++,
.data_types = data_types,
.pointer_type = pointer_type,
.dfmt = buffer.dfmt,
.nfmt = buffer.nfmt,
.stride = buffer.GetVsharp(info).GetStride(),
});
interfaces.push_back(id);
i++;
}
}
void EmitContext::DefineTextureBuffers() {
for (const auto& desc : info.texture_buffers) {
const bool is_integer =
desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
const u32 sampled = desc.is_written ? 2 : 1;
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
sampled, spv::ImageFormat::Unknown)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
texture_buffers.push_back({
.id = id,
.binding = binding++,
.image_type = image_type,
.result_type = sampled_type[4],
.is_integer = is_integer,
});
interfaces.push_back(id);
}
}
@ -447,7 +486,7 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
const auto format = desc.is_storage ? GetFormat(image) : spv::ImageFormat::Unknown;
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
const u32 sampled = desc.is_storage ? 2 : 1;
switch (desc.type) {
case AmdGpu::ImageType::Color1D:
@ -470,17 +509,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
void EmitContext::DefineImagesAndSamplers() {
for (const auto& image_desc : info.images) {
const VectorIds* data_types = [&] {
switch (image_desc.nfmt) {
case AmdGpu::NumberFormat::Uint:
return &U32;
case AmdGpu::NumberFormat::Sint:
return &S32;
default:
return &F32;
}
}();
const Id sampled_type = data_types->Get(1);
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
@ -489,7 +519,7 @@ void EmitContext::DefineImagesAndSamplers() {
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
image_desc.dword_offset));
images.push_back({
.data_types = data_types,
.data_types = &data_types,
.id = id,
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type,
@ -498,13 +528,12 @@ void EmitContext::DefineImagesAndSamplers() {
interfaces.push_back(id);
++binding;
}
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
}
if (info.samplers.empty()) {
return;
}
sampler_type = TypeSampler();
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
for (const auto& samp_desc : info.samplers) {
@ -520,14 +549,15 @@ void EmitContext::DefineImagesAndSamplers() {
}
void EmitContext::DefineSharedMemory() {
static constexpr size_t DefaultSharedMemSize = 16_KB;
static constexpr size_t DefaultSharedMemSize = 2_KB;
if (!info.uses_shared) {
return;
}
if (info.shared_memory_size == 0) {
info.shared_memory_size = DefaultSharedMemSize;
u32 shared_memory_size = info.shared_memory_size;
if (shared_memory_size == 0) {
shared_memory_size = DefaultSharedMemSize;
}
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);