mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-21 02:45:00 +00:00
video_core: Account of runtime state changes when compiling shaders (#575)
* video_core: Compile shader permutations * spirv: Only specific storage image format for atomics * ir: Avoid cube coord patching for storage image * spirv: Fix default attributes * data_share: Add more instructions * video_core: Query storage flag with runtime state * kernel: Use std::list for semaphore * video_core: Use texture buffers for untyped format load/store * buffer_cache: Limit view usage * vk_pipeline_cache: Fix invalid iterator * image_view: Reduce log spam when alpha=1 in storage swizzle * video_core: More features and proper spirv feature detection * video_core: Attempt no2 for specialization * spirv: Remove conflict * vk_shader_cache: Small cleanup
This commit is contained in:
parent
790d19e59b
commit
66e96dd944
43 changed files with 1058 additions and 976 deletions
|
@ -99,7 +99,7 @@ Id TypeId(const EmitContext& ctx, IR::Type type) {
|
|||
}
|
||||
}
|
||||
|
||||
void Traverse(EmitContext& ctx, IR::Program& program) {
|
||||
void Traverse(EmitContext& ctx, const IR::Program& program) {
|
||||
IR::Block* current_block{};
|
||||
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
|
||||
switch (node.type) {
|
||||
|
@ -162,7 +162,7 @@ void Traverse(EmitContext& ctx, IR::Program& program) {
|
|||
}
|
||||
}
|
||||
|
||||
Id DefineMain(EmitContext& ctx, IR::Program& program) {
|
||||
Id DefineMain(EmitContext& ctx, const IR::Program& program) {
|
||||
const Id void_function{ctx.TypeFunction(ctx.void_id)};
|
||||
const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
|
@ -185,8 +185,27 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddCapability(spv::Capability::Int16);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
if (info.has_storage_images) {
|
||||
if (info.has_storage_images || info.has_image_buffers) {
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||
}
|
||||
if (info.has_texel_buffers) {
|
||||
ctx.AddCapability(spv::Capability::SampledBuffer);
|
||||
}
|
||||
if (info.has_image_buffers) {
|
||||
ctx.AddCapability(spv::Capability::ImageBuffer);
|
||||
}
|
||||
if (info.has_image_gather) {
|
||||
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||
}
|
||||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
if (info.uses_lane_id) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
}
|
||||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
|
@ -206,19 +225,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
} else {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
if (info.has_discard) {
|
||||
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
||||
}
|
||||
if (info.has_image_gather) {
|
||||
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||
}
|
||||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
if (info.stores.Get(IR::Attribute::Depth)) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
|
@ -229,7 +238,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||
}
|
||||
|
||||
void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
|
||||
void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
|
||||
auto inst{program.blocks.front()->begin()};
|
||||
size_t block_index{0};
|
||||
ctx.PatchDeferredPhi([&](size_t phi_arg) {
|
||||
|
@ -248,8 +257,8 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) {
|
||||
EmitContext ctx{profile, program, binding};
|
||||
std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) {
|
||||
EmitContext ctx{profile, program.info, binding};
|
||||
const Id main{DefineMain(ctx, program)};
|
||||
DefineEntryPoint(program, ctx, main);
|
||||
if (program.info.stage == Stage::Vertex) {
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program,
|
||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program,
|
||||
u32& binding);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -262,171 +262,15 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
|||
return EmitLoadBufferF32xN<4>(ctx, handle, address);
|
||||
}
|
||||
|
||||
static bool IsSignedInteger(AmdGpu::NumberFormat format) {
|
||||
switch (format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return false;
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return true;
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
static u32 UXBitsMax(u32 bit_width) {
|
||||
return (1u << bit_width) - 1u;
|
||||
}
|
||||
|
||||
static u32 SXBitsMax(u32 bit_width) {
|
||||
return (1u << (bit_width - 1u)) - 1u;
|
||||
}
|
||||
|
||||
static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, u32 bit_width) {
|
||||
switch (format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))));
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width))));
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
// (x * 2 + 1) / (Format::SMAX * 2)
|
||||
value = ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(2.f));
|
||||
value = ctx.OpFAdd(ctx.F32[1], value, ctx.ConstF32(1.f));
|
||||
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width) * 2)));
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return value;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
|
||||
magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
||||
static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offset) {
|
||||
Id comp_offset = ctx.ConstU32(bit_offset);
|
||||
if (stride < 4) {
|
||||
// comp_offset += (address % 4) * 8;
|
||||
const Id byte_offset = ctx.OpUMod(ctx.U32[1], address, ctx.ConstU32(4u));
|
||||
const Id bit_offset = ctx.OpShiftLeftLogical(ctx.U32[1], byte_offset, ctx.ConstU32(3u));
|
||||
comp_offset = ctx.OpIAdd(ctx.U32[1], comp_offset, bit_offset);
|
||||
}
|
||||
return comp_offset;
|
||||
}
|
||||
|
||||
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
const auto format = buffer.dfmt;
|
||||
switch (format) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
return ctx.f32_zero_value;
|
||||
case AmdGpu::DataFormat::Format8:
|
||||
case AmdGpu::DataFormat::Format16:
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format8_8:
|
||||
case AmdGpu::DataFormat::Format16_16:
|
||||
case AmdGpu::DataFormat::Format10_11_11:
|
||||
case AmdGpu::DataFormat::Format11_11_10:
|
||||
case AmdGpu::DataFormat::Format10_10_10_2:
|
||||
case AmdGpu::DataFormat::Format2_10_10_10:
|
||||
case AmdGpu::DataFormat::Format8_8_8_8:
|
||||
case AmdGpu::DataFormat::Format32_32:
|
||||
case AmdGpu::DataFormat::Format16_16_16_16:
|
||||
case AmdGpu::DataFormat::Format32_32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32: {
|
||||
const u32 num_components = AmdGpu::NumComponents(format);
|
||||
if (comp >= num_components) {
|
||||
return ctx.f32_zero_value;
|
||||
}
|
||||
|
||||
// uint index = address / 4;
|
||||
Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const u32 stride = buffer.stride;
|
||||
if (stride > 4) {
|
||||
const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32);
|
||||
if (index_offset > 0) {
|
||||
// index += index_offset;
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(index_offset));
|
||||
}
|
||||
}
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
|
||||
|
||||
const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32;
|
||||
const u32 bit_width = AmdGpu::ComponentBits(format, comp);
|
||||
const auto num_format = buffer.nfmt;
|
||||
if (num_format == AmdGpu::NumberFormat::Float) {
|
||||
if (bit_width == 32) {
|
||||
return ctx.OpLoad(ctx.F32[1], ptr);
|
||||
} else if (bit_width == 16) {
|
||||
const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
|
||||
Id value = ctx.OpLoad(ctx.U32[1], ptr);
|
||||
value =
|
||||
ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width));
|
||||
value = ctx.OpSConvert(ctx.U16, value);
|
||||
value = ctx.OpBitcast(ctx.F16[1], value);
|
||||
return ctx.OpFConvert(ctx.F32[1], value);
|
||||
} else {
|
||||
UNREACHABLE_MSG("Invalid float bit width {}", bit_width);
|
||||
}
|
||||
} else {
|
||||
Id value = ctx.OpLoad(ctx.U32[1], ptr);
|
||||
const bool is_signed = IsSignedInteger(num_format);
|
||||
if (bit_width < 32) {
|
||||
const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
|
||||
if (is_signed) {
|
||||
value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
|
||||
ctx.ConstU32(bit_width));
|
||||
} else {
|
||||
value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
|
||||
ctx.ConstU32(bit_width));
|
||||
}
|
||||
}
|
||||
value = ctx.OpBitcast(ctx.F32[1], value);
|
||||
return ConvertValue(ctx, value, num_format, bit_width);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if constexpr (N == 1) {
|
||||
return GetBufferFormatValue(ctx, handle, address, 0);
|
||||
} else {
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
ids.push_back(GetBufferFormatValue(ctx, handle, address, i));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.F32[N], ids);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferFormatF32xN<1>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferFormatF32xN<2>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferFormatF32xN<3>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address);
|
||||
const auto& buffer = ctx.texture_buffers[handle];
|
||||
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
|
||||
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
|
||||
Id texel = ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
|
||||
if (buffer.is_integer) {
|
||||
texel = ctx.OpBitcast(ctx.F32[4], texel);
|
||||
}
|
||||
return texel;
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
|
@ -467,97 +311,14 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
|
|||
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
|
||||
u32 bit_width) {
|
||||
switch (format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return ctx.OpConvertFToU(
|
||||
ctx.U32[1], ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width)))));
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return ctx.OpBitcast(ctx.U32[1], value);
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return value;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
|
||||
magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
const auto format = buffer.dfmt;
|
||||
const auto num_format = buffer.nfmt;
|
||||
|
||||
switch (format) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
return;
|
||||
case AmdGpu::DataFormat::Format8_8_8_8:
|
||||
case AmdGpu::DataFormat::Format16:
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32: {
|
||||
ASSERT(N == AmdGpu::NumComponents(format));
|
||||
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
|
||||
|
||||
Id packed_value{};
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const u32 bit_width = AmdGpu::ComponentBits(format, i);
|
||||
const u32 bit_offset = AmdGpu::ComponentOffset(format, i) % 32;
|
||||
|
||||
const Id comp{ConvertF32ToFormat(
|
||||
ctx, N == 1 ? value : ctx.OpCompositeExtract(ctx.F32[1], value, i), num_format,
|
||||
bit_width)};
|
||||
|
||||
if (bit_width == 32) {
|
||||
if constexpr (N == 1) {
|
||||
ctx.OpStore(ptr, comp);
|
||||
} else {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id,
|
||||
ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, comp);
|
||||
}
|
||||
} else {
|
||||
if (i == 0) {
|
||||
packed_value = comp;
|
||||
} else {
|
||||
packed_value =
|
||||
ctx.OpBitFieldInsert(ctx.U32[1], packed_value, comp,
|
||||
ctx.ConstU32(bit_offset), ctx.ConstU32(bit_width));
|
||||
}
|
||||
|
||||
if (i == N - 1) {
|
||||
ctx.OpStore(ptr, packed_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value);
|
||||
const auto& buffer = ctx.texture_buffers[handle];
|
||||
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
|
||||
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
|
||||
if (buffer.is_integer) {
|
||||
value = ctx.OpBitcast(ctx.U32[4], value);
|
||||
}
|
||||
ctx.OpImageWrite(tex_buffer, coord, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -41,13 +41,14 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
|
||||
stage{program.info.stage}, binding{binding_} {
|
||||
EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage},
|
||||
binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces();
|
||||
DefineBuffers();
|
||||
DefineTextureBuffers();
|
||||
DefineImagesAndSamplers();
|
||||
DefineSharedMemory();
|
||||
}
|
||||
|
@ -123,25 +124,24 @@ void EmitContext::DefineInterfaces() {
|
|||
DefineOutputs();
|
||||
}
|
||||
|
||||
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32[4];
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return ctx.U32[4];
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
return ctx.F32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32;
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return ctx.U32;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
|
||||
|
@ -162,7 +162,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
default:
|
||||
break;
|
||||
}
|
||||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBufferOffsets() {
|
||||
|
@ -177,6 +177,16 @@ void EmitContext::DefineBufferOffsets() {
|
|||
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
|
||||
}
|
||||
for (auto& tex_buffer : texture_buffers) {
|
||||
const u32 binding = tex_buffer.binding;
|
||||
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
}
|
||||
}
|
||||
|
||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||
|
@ -195,6 +205,11 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|||
}
|
||||
|
||||
void EmitContext::DefineInputs() {
|
||||
if (info.uses_lane_id) {
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
switch (stage) {
|
||||
case Stage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
|
@ -202,7 +217,7 @@ void EmitContext::DefineInputs() {
|
|||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
||||
for (const auto& input : info.vs_inputs) {
|
||||
const Id type{GetAttributeType(*this, input.fmt)};
|
||||
const Id type{GetAttributeType(*this, input.fmt)[4]};
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
|
||||
|
@ -229,15 +244,12 @@ void EmitContext::DefineInputs() {
|
|||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
for (const auto& input : info.ps_inputs) {
|
||||
const u32 semantic = input.param_index;
|
||||
if (input.is_default) {
|
||||
if (input.is_default && !input.is_flat) {
|
||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
|
||||
F32[1], 4, true};
|
||||
continue;
|
||||
|
@ -328,47 +340,74 @@ void EmitContext::DefinePushDataBlock() {
|
|||
|
||||
void EmitContext::DefineBuffers() {
|
||||
boost::container::small_vector<Id, 8> type_ids;
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
const Id record_array_type{buffer.is_storage
|
||||
? TypeRuntimeArray(data_type)
|
||||
: TypeArray(data_type, ConstU32(buffer.length))};
|
||||
const auto define_struct = [&](Id record_array_type, bool is_instance_data) {
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
const auto name =
|
||||
buffer.is_instance_data
|
||||
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
|
||||
sizeof(float) * CHAR_BIT)
|
||||
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
type_ids.push_back(record_array_type);
|
||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
|
||||
return struct_type;
|
||||
}
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
const auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
|
||||
: fmt::format("{}_cbuf_block_f32", stage);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
type_ids.push_back(record_array_type);
|
||||
return struct_type;
|
||||
};
|
||||
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(sharp);
|
||||
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
||||
: TypeArray(data_type, ConstU32(sharp.NumDwords()))};
|
||||
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
|
||||
|
||||
const auto storage_class =
|
||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("{}_{}", buffer.is_storage ? "ssbo" : "cbuf", buffer.sgpr_base));
|
||||
if (is_storage && !desc.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
}
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
|
||||
|
||||
buffers.push_back({
|
||||
.id = id,
|
||||
.binding = binding++,
|
||||
.data_types = data_types,
|
||||
.pointer_type = pointer_type,
|
||||
.dfmt = buffer.dfmt,
|
||||
.nfmt = buffer.nfmt,
|
||||
.stride = buffer.GetVsharp(info).GetStride(),
|
||||
});
|
||||
interfaces.push_back(id);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineTextureBuffers() {
|
||||
for (const auto& desc : info.texture_buffers) {
|
||||
const bool is_integer =
|
||||
desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
|
||||
const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
|
||||
const u32 sampled = desc.is_written ? 2 : 1;
|
||||
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
|
||||
sampled, spv::ImageFormat::Unknown)};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
|
||||
texture_buffers.push_back({
|
||||
.id = id,
|
||||
.binding = binding++,
|
||||
.image_type = image_type,
|
||||
.result_type = sampled_type[4],
|
||||
.is_integer = is_integer,
|
||||
});
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -447,7 +486,7 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
|
||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
|
||||
const auto format = desc.is_storage ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||
const u32 sampled = desc.is_storage ? 2 : 1;
|
||||
switch (desc.type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
|
@ -470,17 +509,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
|
||||
void EmitContext::DefineImagesAndSamplers() {
|
||||
for (const auto& image_desc : info.images) {
|
||||
const VectorIds* data_types = [&] {
|
||||
switch (image_desc.nfmt) {
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return &U32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return &S32;
|
||||
default:
|
||||
return &F32;
|
||||
}
|
||||
}();
|
||||
const Id sampled_type = data_types->Get(1);
|
||||
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
|
||||
const Id sampled_type = data_types[1];
|
||||
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
|
@ -489,7 +519,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
|
||||
image_desc.dword_offset));
|
||||
images.push_back({
|
||||
.data_types = data_types,
|
||||
.data_types = &data_types,
|
||||
.id = id,
|
||||
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
|
||||
.pointer_type = pointer_type,
|
||||
|
@ -498,13 +528,12 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
interfaces.push_back(id);
|
||||
++binding;
|
||||
}
|
||||
|
||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||
|
||||
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
|
||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||
}
|
||||
if (info.samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
sampler_type = TypeSampler();
|
||||
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
|
||||
for (const auto& samp_desc : info.samplers) {
|
||||
|
@ -520,14 +549,15 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory() {
|
||||
static constexpr size_t DefaultSharedMemSize = 16_KB;
|
||||
static constexpr size_t DefaultSharedMemSize = 2_KB;
|
||||
if (!info.uses_shared) {
|
||||
return;
|
||||
}
|
||||
if (info.shared_memory_size == 0) {
|
||||
info.shared_memory_size = DefaultSharedMemSize;
|
||||
u32 shared_memory_size = info.shared_memory_size;
|
||||
if (shared_memory_size == 0) {
|
||||
shared_memory_size = DefaultSharedMemSize;
|
||||
}
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
|
||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
|
|
|
@ -36,7 +36,7 @@ struct VectorIds {
|
|||
|
||||
class EmitContext final : public Sirit::Module {
|
||||
public:
|
||||
explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
|
||||
explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding);
|
||||
~EmitContext();
|
||||
|
||||
Id Def(const IR::Value& value);
|
||||
|
@ -124,7 +124,7 @@ public:
|
|||
return ConstantComposite(type, constituents);
|
||||
}
|
||||
|
||||
Info& info;
|
||||
const Info& info;
|
||||
const Profile& profile;
|
||||
Stage stage{};
|
||||
|
||||
|
@ -207,13 +207,19 @@ public:
|
|||
u32 binding;
|
||||
const VectorIds* data_types;
|
||||
Id pointer_type;
|
||||
AmdGpu::DataFormat dfmt;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
u32 stride;
|
||||
};
|
||||
struct TextureBufferDefinition {
|
||||
Id id;
|
||||
Id coord_offset;
|
||||
u32 binding;
|
||||
Id image_type;
|
||||
Id result_type;
|
||||
bool is_integer;
|
||||
};
|
||||
|
||||
u32& binding;
|
||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
|
@ -238,6 +244,7 @@ private:
|
|||
void DefineOutputs();
|
||||
void DefinePushDataBlock();
|
||||
void DefineBuffers();
|
||||
void DefineTextureBuffers();
|
||||
void DefineImagesAndSamplers();
|
||||
void DefineSharedMemory();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue