mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
1244 lines
55 KiB
C++
1244 lines
55 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "common/assert.h"
|
|
#include "common/div_ceil.h"
|
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
|
#include "shader_recompiler/runtime_info.h"
|
|
#include "video_core/amdgpu/types.h"
|
|
#include "video_core/buffer_cache/buffer_cache.h"
|
|
|
|
#include <boost/container/static_vector.hpp>
|
|
#include <fmt/format.h>
|
|
|
|
#include <numbers>
|
|
#include <string_view>
|
|
|
|
namespace Shader::Backend::SPIRV {
|
|
namespace {
|
|
|
|
std::string_view StageName(Stage stage) {
|
|
switch (stage) {
|
|
case Stage::Vertex:
|
|
return "vs";
|
|
case Stage::Local:
|
|
return "ls";
|
|
case Stage::Export:
|
|
return "es";
|
|
case Stage::Hull:
|
|
return "hs";
|
|
case Stage::Geometry:
|
|
return "gs";
|
|
case Stage::Fragment:
|
|
return "fs";
|
|
case Stage::Compute:
|
|
return "cs";
|
|
}
|
|
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
|
|
}
|
|
|
|
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
|
|
switch (type) {
|
|
case AmdGpu::PrimitiveType::PointList:
|
|
return 1u;
|
|
case AmdGpu::PrimitiveType::LineList:
|
|
case AmdGpu::PrimitiveType::LineStrip:
|
|
return 2u;
|
|
case AmdGpu::PrimitiveType::TriangleList:
|
|
case AmdGpu::PrimitiveType::TriangleStrip:
|
|
case AmdGpu::PrimitiveType::RectList:
|
|
return 3u;
|
|
case AmdGpu::PrimitiveType::AdjTriangleList:
|
|
return 6u;
|
|
case AmdGpu::PrimitiveType::AdjLineList:
|
|
return 4u;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
template <typename... Args>
|
|
void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
|
|
ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
|
|
std::forward<Args>(args)...)
|
|
.c_str());
|
|
}
|
|
|
|
} // Anonymous namespace
|
|
|
|
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, Info& info_,
|
|
Bindings& binding_)
|
|
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
|
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
|
if (info.dma_types != IR::Type::Void) {
|
|
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
|
|
} else {
|
|
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
|
}
|
|
|
|
AddCapability(spv::Capability::Shader);
|
|
DefineArithmeticTypes();
|
|
DefineInterfaces();
|
|
DefineSharedMemory();
|
|
DefineBuffers();
|
|
DefineImagesAndSamplers();
|
|
DefineFunctions();
|
|
}
|
|
|
|
EmitContext::~EmitContext() = default;
|
|
|
|
Id EmitContext::Def(const IR::Value& value) {
|
|
if (!value.IsImmediate()) {
|
|
return value.InstRecursive()->Definition<Id>();
|
|
}
|
|
switch (value.Type()) {
|
|
case IR::Type::Void:
|
|
return Id{};
|
|
case IR::Type::U1:
|
|
return value.U1() ? true_value : false_value;
|
|
case IR::Type::U32:
|
|
return ConstU32(value.U32());
|
|
case IR::Type::U64:
|
|
return Constant(U64, value.U64());
|
|
case IR::Type::F32:
|
|
return ConstF32(value.F32());
|
|
case IR::Type::F64:
|
|
return Constant(F64[1], value.F64());
|
|
case IR::Type::StringLiteral:
|
|
return String(value.StringLiteral());
|
|
default:
|
|
throw NotImplementedException("Immediate type {}", value.Type());
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineArithmeticTypes() {
|
|
void_id = Name(TypeVoid(), "void_id");
|
|
U1[1] = Name(TypeBool(), "bool_id");
|
|
U8 = Name(TypeUInt(8), "u8_id");
|
|
U16 = Name(TypeUInt(16), "u16_id");
|
|
if (info.uses_fp16) {
|
|
F16[1] = Name(TypeFloat(16), "f16_id");
|
|
U16 = Name(TypeUInt(16), "u16_id");
|
|
}
|
|
if (info.uses_fp64) {
|
|
F64[1] = Name(TypeFloat(64), "f64_id");
|
|
}
|
|
F32[1] = Name(TypeFloat(32), "f32_id");
|
|
S32[1] = Name(TypeSInt(32), "i32_id");
|
|
U32[1] = Name(TypeUInt(32), "u32_id");
|
|
U64 = Name(TypeUInt(64), "u64_id");
|
|
|
|
for (u32 i = 2; i <= 4; i++) {
|
|
if (info.uses_fp16) {
|
|
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
|
|
}
|
|
if (info.uses_fp64) {
|
|
F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
|
|
}
|
|
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
|
|
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));
|
|
U32[i] = Name(TypeVector(U32[1], i), fmt::format("u32vec{}_id", i));
|
|
U1[i] = Name(TypeVector(U1[1], i), fmt::format("bvec{}_id", i));
|
|
}
|
|
|
|
true_value = ConstantTrue(U1[1]);
|
|
false_value = ConstantFalse(U1[1]);
|
|
u8_one_value = Constant(U8, 1U);
|
|
u8_zero_value = Constant(U8, 0U);
|
|
u16_zero_value = Constant(U16, 0U);
|
|
u32_one_value = ConstU32(1U);
|
|
u32_zero_value = ConstU32(0U);
|
|
f32_zero_value = ConstF32(0.0f);
|
|
u64_one_value = Constant(U64, 1ULL);
|
|
u64_zero_value = Constant(U64, 0ULL);
|
|
|
|
pi_x2 = ConstF32(2.0f * float{std::numbers::pi});
|
|
|
|
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
|
|
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
|
|
input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32");
|
|
|
|
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
|
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
|
|
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
|
|
|
|
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
|
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
|
frexp_result_f32 = Name(TypeStruct(F32[1], S32[1]), "frexp_result_f32");
|
|
if (info.uses_fp64) {
|
|
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
|
|
}
|
|
|
|
if (True(info.dma_types & IR::Type::F64)) {
|
|
physical_pointer_types[PointerType::F64] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
|
|
}
|
|
if (True(info.dma_types & IR::Type::U64)) {
|
|
physical_pointer_types[PointerType::U64] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
|
|
}
|
|
if (True(info.dma_types & IR::Type::F32)) {
|
|
physical_pointer_types[PointerType::F32] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
|
|
}
|
|
if (True(info.dma_types & IR::Type::U32)) {
|
|
physical_pointer_types[PointerType::U32] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
|
|
}
|
|
if (True(info.dma_types & IR::Type::F16)) {
|
|
physical_pointer_types[PointerType::F16] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
|
|
}
|
|
if (True(info.dma_types & IR::Type::U16)) {
|
|
physical_pointer_types[PointerType::U16] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
|
|
}
|
|
if (True(info.dma_types & IR::Type::U8)) {
|
|
physical_pointer_types[PointerType::U8] =
|
|
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineInterfaces() {
|
|
DefinePushDataBlock();
|
|
DefineInputs();
|
|
DefineOutputs();
|
|
}
|
|
|
|
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|
switch (GetNumberClass(fmt)) {
|
|
case AmdGpu::NumberClass::Float:
|
|
return ctx.F32;
|
|
case AmdGpu::NumberClass::Sint:
|
|
return ctx.S32;
|
|
case AmdGpu::NumberClass::Uint:
|
|
return ctx.U32;
|
|
default:
|
|
break;
|
|
}
|
|
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
|
}
|
|
|
|
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
|
u32 num_components, bool output) {
|
|
switch (GetNumberClass(fmt)) {
|
|
case AmdGpu::NumberClass::Float:
|
|
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
|
|
case AmdGpu::NumberClass::Uint:
|
|
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
|
|
case AmdGpu::NumberClass::Sint:
|
|
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
|
|
default:
|
|
break;
|
|
}
|
|
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
|
}
|
|
|
|
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
|
|
// Can this be done with memory access? Like we do now with ReadConst
|
|
const auto& srt_flatbuf = buffers[flatbuf_index];
|
|
ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf);
|
|
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
|
|
|
|
const auto rsrc1{
|
|
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
|
|
const auto rsrc2{
|
|
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 2)))};
|
|
|
|
const auto stride{OpBitFieldUExtract(U32[1], rsrc1, ConstU32(16u), ConstU32(14u))};
|
|
const auto num_records{rsrc2};
|
|
|
|
const auto stride_zero{OpIEqual(U1[1], stride, u32_zero_value)};
|
|
const auto stride_size{OpIMul(U32[1], num_records, stride)};
|
|
return OpSelect(U32[1], stride_zero, num_records, stride_size);
|
|
}
|
|
|
|
void EmitContext::DefineBufferProperties() {
|
|
for (u32 i = 0; i < buffers.size(); i++) {
|
|
BufferDefinition& buffer = buffers[i];
|
|
if (buffer.buffer_type != BufferType::Guest) {
|
|
continue;
|
|
}
|
|
const u32 binding = buffer.binding;
|
|
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
|
|
const u32 comp = (binding & 0xf) >> 2;
|
|
const u32 offset = (binding & 0x3) << 3;
|
|
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
|
push_data_block, ConstU32(half), ConstU32(comp))};
|
|
const Id value{OpLoad(U32[1], ptr)};
|
|
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
|
Name(buffer.offset, fmt::format("buf{}_off", binding));
|
|
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
|
|
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
|
|
|
|
// Only need to load size if performing bounds checks and the buffer is both guest and not
|
|
// inline.
|
|
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
|
|
const BufferResource& desc = info.buffers[i];
|
|
if (desc.sharp_idx == std::numeric_limits<u32>::max()) {
|
|
buffer.size = ConstU32(desc.inline_cbuf.GetSize());
|
|
} else {
|
|
buffer.size = GetBufferSize(desc.sharp_idx);
|
|
}
|
|
Name(buffer.size, fmt::format("buf{}_size", binding));
|
|
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
|
|
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
|
|
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
|
|
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
|
|
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
|
|
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
|
|
}
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineInterpolatedAttribs() {
|
|
if (!profile.needs_manual_interpolation) {
|
|
return;
|
|
}
|
|
// Iterate all input attributes, load them and manually interpolate.
|
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
|
const auto& input = runtime_info.fs_info.inputs[i];
|
|
auto& params = input_params[i];
|
|
if (input.is_flat || params.is_loaded) {
|
|
continue;
|
|
}
|
|
const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)};
|
|
const Id p0{OpCompositeExtract(F32[4], p_array, 0U)};
|
|
const Id p1{OpCompositeExtract(F32[4], p_array, 1U)};
|
|
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
|
|
const Id p10{OpFSub(F32[4], p1, p0)};
|
|
const Id p20{OpFSub(F32[4], p2, p0)};
|
|
const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
|
|
? bary_coord_linear_id
|
|
: bary_coord_persp_id)};
|
|
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
|
|
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
|
|
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
|
|
const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)};
|
|
params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z));
|
|
Name(params.id, fmt::format("fs_in_attr{}", i));
|
|
params.is_loaded = true;
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineWorkgroupIndex() {
|
|
const Id workgroup_id_val{OpLoad(U32[3], workgroup_id)};
|
|
const Id workgroup_x{OpCompositeExtract(U32[1], workgroup_id_val, 0)};
|
|
const Id workgroup_y{OpCompositeExtract(U32[1], workgroup_id_val, 1)};
|
|
const Id workgroup_z{OpCompositeExtract(U32[1], workgroup_id_val, 2)};
|
|
const Id num_workgroups{OpLoad(U32[3], num_workgroups_id)};
|
|
const Id num_workgroups_x{OpCompositeExtract(U32[1], num_workgroups, 0)};
|
|
const Id num_workgroups_y{OpCompositeExtract(U32[1], num_workgroups, 1)};
|
|
workgroup_index_id =
|
|
OpIAdd(U32[1], OpIAdd(U32[1], workgroup_x, OpIMul(U32[1], workgroup_y, num_workgroups_x)),
|
|
OpIMul(U32[1], workgroup_z, OpIMul(U32[1], num_workgroups_x, num_workgroups_y)));
|
|
Name(workgroup_index_id, "workgroup_index");
|
|
}
|
|
|
|
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|
switch (default_value) {
|
|
case 0:
|
|
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
|
|
case 1:
|
|
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
|
|
case 2:
|
|
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
|
|
case 3:
|
|
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineInputs() {
|
|
if (info.uses_lane_id) {
|
|
subgroup_local_invocation_id = DefineVariable(
|
|
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
|
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
|
}
|
|
switch (l_stage) {
|
|
case LogicalStage::Vertex: {
|
|
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
|
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
|
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
|
|
|
const auto fetch_shader = Gcn::ParseFetchShader(info);
|
|
if (!fetch_shader) {
|
|
break;
|
|
}
|
|
for (const auto& attrib : fetch_shader->attributes) {
|
|
ASSERT(attrib.semantic < IR::NumParams);
|
|
const auto sharp = attrib.GetSharp(info);
|
|
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
|
if (attrib.UsesStepRates()) {
|
|
const u32 rate_idx =
|
|
attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
|
|
: 1;
|
|
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
|
|
const auto buffer =
|
|
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
|
|
return buffer.instance_attrib == attrib.semantic;
|
|
});
|
|
// Note that we pass index rather than Id
|
|
input_params[attrib.semantic] = SpirvAttribute{
|
|
.id = {rate_idx},
|
|
.pointer_type = input_u32,
|
|
.component_type = U32[1],
|
|
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
|
.is_integer = true,
|
|
.is_loaded = false,
|
|
.buffer_handle = int(buffer - info.buffers.begin()),
|
|
};
|
|
} else {
|
|
Id id{DefineInput(type, attrib.semantic)};
|
|
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
|
|
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
|
} else {
|
|
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
|
}
|
|
input_params[attrib.semantic] =
|
|
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::Fragment:
|
|
if (info.loads.GetAny(IR::Attribute::FragCoord)) {
|
|
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
|
}
|
|
if (info.stores.Get(IR::Attribute::Depth)) {
|
|
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
|
}
|
|
if (info.loads.Get(IR::Attribute::IsFrontFace)) {
|
|
front_facing =
|
|
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
|
}
|
|
if (profile.needs_manual_interpolation) {
|
|
if (info.has_perspective_interp) {
|
|
bary_coord_persp_id =
|
|
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
|
}
|
|
if (info.has_linear_interp) {
|
|
bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
|
|
spv::StorageClass::Input);
|
|
}
|
|
}
|
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
|
const auto& input = runtime_info.fs_info.inputs[i];
|
|
if (input.IsDefault()) {
|
|
input_params[i] = {
|
|
.id = MakeDefaultValue(*this, input.default_value),
|
|
.pointer_type = input_f32,
|
|
.component_type = F32[1],
|
|
.num_components = 4,
|
|
.is_integer = false,
|
|
.is_loaded = true,
|
|
};
|
|
continue;
|
|
}
|
|
const IR::Attribute param{IR::Attribute::Param0 + i};
|
|
const u32 num_components = info.loads.NumComponents(param);
|
|
const Id type{F32[num_components]};
|
|
Id attr_id{};
|
|
if (profile.needs_manual_interpolation && !input.is_flat) {
|
|
attr_id = DefineInput(TypeArray(type, ConstU32(3U)), input.param_index);
|
|
Decorate(attr_id, spv::Decoration::PerVertexKHR);
|
|
Name(attr_id, fmt::format("fs_in_attr{}_p", i));
|
|
} else {
|
|
attr_id = DefineInput(type, input.param_index);
|
|
Name(attr_id, fmt::format("fs_in_attr{}", i));
|
|
|
|
if (input.is_flat) {
|
|
Decorate(attr_id, spv::Decoration::Flat);
|
|
} else if (IsLinear(info.interp_qualifiers[i])) {
|
|
Decorate(attr_id, spv::Decoration::NoPerspective);
|
|
}
|
|
}
|
|
input_params[i] =
|
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
|
}
|
|
break;
|
|
case LogicalStage::Compute:
|
|
if (info.loads.GetAny(IR::Attribute::WorkgroupIndex) ||
|
|
info.loads.GetAny(IR::Attribute::WorkgroupId)) {
|
|
workgroup_id =
|
|
DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
|
}
|
|
if (info.loads.GetAny(IR::Attribute::WorkgroupIndex)) {
|
|
num_workgroups_id =
|
|
DefineVariable(U32[3], spv::BuiltIn::NumWorkgroups, spv::StorageClass::Input);
|
|
}
|
|
if (info.loads.GetAny(IR::Attribute::LocalInvocationId)) {
|
|
local_invocation_id =
|
|
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
|
}
|
|
break;
|
|
case LogicalStage::Geometry: {
|
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
|
const auto gl_per_vertex =
|
|
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
|
"gl_PerVertex");
|
|
MemberName(gl_per_vertex, 0, "gl_Position");
|
|
MemberName(gl_per_vertex, 1, "gl_PointSize");
|
|
MemberName(gl_per_vertex, 2, "gl_ClipDistance");
|
|
MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn,
|
|
static_cast<std::uint32_t>(spv::BuiltIn::Position));
|
|
MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn,
|
|
static_cast<std::uint32_t>(spv::BuiltIn::PointSize));
|
|
MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn,
|
|
static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance));
|
|
Decorate(gl_per_vertex, spv::Decoration::Block);
|
|
const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive);
|
|
const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in));
|
|
gl_in = Name(DefineVar(vertices_in, spv::StorageClass::Input), "gl_in");
|
|
interfaces.push_back(gl_in);
|
|
|
|
const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u;
|
|
for (int param_id = 0; param_id < num_params; ++param_id) {
|
|
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
|
const Id id{DefineInput(type, param_id)};
|
|
Name(id, fmt::format("gs_in_attr{}", param_id));
|
|
input_params[param_id] = {id, input_f32, F32[1], 4};
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::TessellationControl: {
|
|
invocation_id =
|
|
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
|
|
patch_vertices =
|
|
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
|
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
|
|
|
const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.ls_stride, 16) >> 4;
|
|
if (num_attrs > 0) {
|
|
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
|
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
|
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
|
input_attr_array = DefineInput(patch_array_type, 0);
|
|
Name(input_attr_array, "in_attrs");
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::TessellationEval: {
|
|
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
|
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
|
|
|
const u32 num_attrs = Common::AlignUp(runtime_info.vs_info.hs_output_cp_stride, 16) >> 4;
|
|
if (num_attrs > 0) {
|
|
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
|
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
|
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
|
input_attr_array = DefineInput(patch_array_type, 0);
|
|
Name(input_attr_array, "in_attrs");
|
|
}
|
|
|
|
const u32 patch_base_location = num_attrs;
|
|
for (size_t index = 0; index < 30; ++index) {
|
|
if (!(info.uses_patches & (1U << index))) {
|
|
continue;
|
|
}
|
|
const Id id{DefineInput(F32[4], patch_base_location + index)};
|
|
Decorate(id, spv::Decoration::Patch);
|
|
Name(id, fmt::format("patch_in{}", index));
|
|
patches[index] = id;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineOutputs() {
|
|
switch (l_stage) {
|
|
case LogicalStage::Vertex: {
|
|
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
|
|
// Might cause problems linking with tcs
|
|
|
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
|
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
|
info.stores.Get(IR::Attribute::Position2) ||
|
|
info.stores.Get(IR::Attribute::Position3);
|
|
if (has_extra_pos_stores) {
|
|
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
|
clip_distances =
|
|
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
|
cull_distances =
|
|
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
|
}
|
|
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
|
|
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
|
|
if (num_attrs > 0) {
|
|
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
|
output_attr_array = DefineOutput(type, 0);
|
|
Name(output_attr_array, "out_attrs");
|
|
}
|
|
} else {
|
|
for (u32 i = 0; i < IR::NumParams; i++) {
|
|
const IR::Attribute param{IR::Attribute::Param0 + i};
|
|
if (!info.stores.GetAny(param)) {
|
|
continue;
|
|
}
|
|
const u32 num_components = info.stores.NumComponents(param);
|
|
const Id id{DefineOutput(F32[num_components], i)};
|
|
Name(id, fmt::format("out_attr{}", i));
|
|
output_params[i] =
|
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::TessellationControl: {
|
|
if (info.stores_tess_level_outer) {
|
|
const Id type{TypeArray(F32[1], ConstU32(4U))};
|
|
output_tess_level_outer =
|
|
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
|
|
Decorate(output_tess_level_outer, spv::Decoration::Patch);
|
|
}
|
|
if (info.stores_tess_level_inner) {
|
|
const Id type{TypeArray(F32[1], ConstU32(2U))};
|
|
output_tess_level_inner =
|
|
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
|
|
Decorate(output_tess_level_inner, spv::Decoration::Patch);
|
|
}
|
|
|
|
const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.hs_output_cp_stride, 16) >> 4;
|
|
if (num_attrs > 0) {
|
|
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
|
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
|
const Id patch_array_type{TypeArray(
|
|
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
|
|
output_attr_array = DefineOutput(patch_array_type, 0);
|
|
Name(output_attr_array, "out_attrs");
|
|
}
|
|
|
|
const u32 patch_base_location = num_attrs;
|
|
for (size_t index = 0; index < 30; ++index) {
|
|
if (!(info.uses_patches & (1U << index))) {
|
|
continue;
|
|
}
|
|
const Id id{DefineOutput(F32[4], patch_base_location + index)};
|
|
Decorate(id, spv::Decoration::Patch);
|
|
Name(id, fmt::format("patch_out{}", index));
|
|
patches[index] = id;
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::TessellationEval: {
|
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
|
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
|
info.stores.Get(IR::Attribute::Position2) ||
|
|
info.stores.Get(IR::Attribute::Position3);
|
|
if (has_extra_pos_stores) {
|
|
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
|
clip_distances =
|
|
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
|
cull_distances =
|
|
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
|
}
|
|
for (u32 i = 0; i < IR::NumParams; i++) {
|
|
const IR::Attribute param{IR::Attribute::Param0 + i};
|
|
if (!info.stores.GetAny(param)) {
|
|
continue;
|
|
}
|
|
const u32 num_components = info.stores.NumComponents(param);
|
|
const Id id{DefineOutput(F32[num_components], i)};
|
|
Name(id, fmt::format("out_attr{}", i));
|
|
output_params[i] =
|
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::Fragment: {
|
|
u32 num_render_targets = 0;
|
|
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
|
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
|
if (!info.stores.GetAny(mrt)) {
|
|
continue;
|
|
}
|
|
const u32 num_components = info.stores.NumComponents(mrt);
|
|
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
|
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
|
Id id;
|
|
if (runtime_info.fs_info.dual_source_blending) {
|
|
id = DefineOutput(type, 0);
|
|
Decorate(id, spv::Decoration::Index, i);
|
|
} else {
|
|
id = DefineOutput(type, i);
|
|
}
|
|
Name(id, fmt::format("frag_color{}", i));
|
|
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
|
++num_render_targets;
|
|
}
|
|
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
|
|
"Dual source blending enabled, there must be exactly two MRT exports");
|
|
break;
|
|
}
|
|
case LogicalStage::Geometry: {
|
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
|
|
|
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
|
const Id id{DefineOutput(F32[4], attr_id)};
|
|
Name(id, fmt::format("out_attr{}", attr_id));
|
|
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
|
}
|
|
break;
|
|
}
|
|
case LogicalStage::Compute:
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefinePushDataBlock() {
|
|
// Create push constants block for instance steps rates
|
|
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
|
U32[4], U32[4], U32[4], U32[4], U32[4]),
|
|
"AuxData")};
|
|
Decorate(struct_type, spv::Decoration::Block);
|
|
MemberName(struct_type, PushData::Step0Index, "sr0");
|
|
MemberName(struct_type, PushData::Step1Index, "sr1");
|
|
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
|
|
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
|
|
MemberName(struct_type, PushData::XScaleIndex, "xscale");
|
|
MemberName(struct_type, PushData::YScaleIndex, "yscale");
|
|
MemberName(struct_type, PushData::UdRegsIndex + 0, "ud_regs0");
|
|
MemberName(struct_type, PushData::UdRegsIndex + 1, "ud_regs1");
|
|
MemberName(struct_type, PushData::UdRegsIndex + 2, "ud_regs2");
|
|
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
|
|
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
|
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
|
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
|
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
|
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
|
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U);
|
|
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U);
|
|
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U);
|
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U);
|
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U);
|
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U);
|
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
|
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
|
Name(push_data_block, "push_data");
|
|
interfaces.push_back(push_data_block);
|
|
}
|
|
|
|
EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_written, u32 elem_shift,
|
|
BufferType buffer_type, Id data_type) {
|
|
// Define array type.
|
|
const Id max_num_items = ConstU32(u32(profile.max_ubo_size) >> elem_shift);
|
|
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
|
: TypeArray(data_type, max_num_items)};
|
|
// Define block struct type. Don't perform decorations twice on the same Id.
|
|
const Id struct_type{TypeStruct(record_array_type)};
|
|
if (std::ranges::find(buf_type_ids, record_array_type.value, &Id::value) ==
|
|
buf_type_ids.end()) {
|
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 1 << elem_shift);
|
|
Decorate(struct_type, spv::Decoration::Block);
|
|
MemberName(struct_type, 0, "data");
|
|
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
|
buf_type_ids.push_back(record_array_type);
|
|
}
|
|
// Define buffer binding interface.
|
|
const auto storage_class =
|
|
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
|
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
|
const Id pointer_type = TypePointer(storage_class, data_type);
|
|
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
|
Decorate(id, spv::Decoration::Binding, binding.unified);
|
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
|
if (is_storage && !is_written) {
|
|
Decorate(id, spv::Decoration::NonWritable);
|
|
}
|
|
switch (buffer_type) {
|
|
case Shader::BufferType::GdsBuffer:
|
|
Name(id, "gds_buffer");
|
|
break;
|
|
case Shader::BufferType::Flatbuf:
|
|
Name(id, "srt_flatbuf");
|
|
break;
|
|
case Shader::BufferType::BdaPagetable:
|
|
Name(id, "bda_pagetable");
|
|
break;
|
|
case Shader::BufferType::FaultBuffer:
|
|
Name(id, "fault_buffer");
|
|
break;
|
|
case Shader::BufferType::SharedMemory:
|
|
Name(id, "ssbo_shmem");
|
|
break;
|
|
default:
|
|
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer));
|
|
break;
|
|
}
|
|
interfaces.push_back(id);
|
|
return {id, pointer_type};
|
|
};
|
|
|
|
void EmitContext::DefineBuffers() {
|
|
if (!profile.supports_robust_buffer_access &&
|
|
info.readconst_types == Info::ReadConstType::None) {
|
|
// In case Flatbuf has not already been bound by IR and is needed
|
|
// to query buffer sizes, bind it now.
|
|
info.buffers.push_back({
|
|
.used_types = IR::Type::U32,
|
|
// We can't guarantee that flatbuf will not grow past UBO
|
|
// limit if there are a lot of ReadConsts. (We could specialize)
|
|
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
|
|
.buffer_type = BufferType::Flatbuf,
|
|
});
|
|
// In the future we may want to read buffer sizes from GPU memory if available.
|
|
// info.readconst_types |= Info::ReadConstType::Immediate;
|
|
}
|
|
for (const auto& desc : info.buffers) {
|
|
const auto buf_sharp = desc.GetSharp(info);
|
|
const bool is_storage = desc.IsStorage(buf_sharp, profile);
|
|
|
|
// Set indexes for special buffers.
|
|
if (desc.buffer_type == BufferType::Flatbuf) {
|
|
flatbuf_index = buffers.size();
|
|
} else if (desc.buffer_type == BufferType::BdaPagetable) {
|
|
bda_pagetable_index = buffers.size();
|
|
} else if (desc.buffer_type == BufferType::FaultBuffer) {
|
|
fault_buffer_index = buffers.size();
|
|
}
|
|
|
|
// Define aliases depending on the shader usage.
|
|
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
|
if (True(desc.used_types & IR::Type::U64)) {
|
|
spv_buffer[PointerType::U64] =
|
|
DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64);
|
|
}
|
|
if (True(desc.used_types & IR::Type::U32)) {
|
|
spv_buffer[PointerType::U32] =
|
|
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
|
}
|
|
if (True(desc.used_types & IR::Type::F32)) {
|
|
spv_buffer[PointerType::F32] =
|
|
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
|
|
}
|
|
if (True(desc.used_types & IR::Type::U16)) {
|
|
spv_buffer[PointerType::U16] =
|
|
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
|
|
}
|
|
if (True(desc.used_types & IR::Type::U8)) {
|
|
spv_buffer[PointerType::U8] =
|
|
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
|
|
}
|
|
++binding.unified;
|
|
}
|
|
}
|
|
|
|
spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
|
return spv::ImageFormat::R32ui;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) {
|
|
return spv::ImageFormat::R32i;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::R32f;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::Rg32f;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
|
return spv::ImageFormat::Rg32ui;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
|
return spv::ImageFormat::Rgba32ui;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::R16f;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
|
return spv::ImageFormat::R16ui;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::Rg16f;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm) {
|
|
return spv::ImageFormat::Rg16Snorm;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
|
return spv::ImageFormat::Rg8;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::Rgba16f;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
|
return spv::ImageFormat::Rgba16;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
|
return spv::ImageFormat::R8;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
|
return spv::ImageFormat::Rgba8;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
|
return spv::ImageFormat::Rgba8ui;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::R11fG11fB10f;
|
|
}
|
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
|
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
|
return spv::ImageFormat::Rgba32f;
|
|
}
|
|
UNREACHABLE_MSG("Unknown storage format data_format={}, num_format={}", image.GetDataFmt(),
|
|
image.GetNumberFmt());
|
|
}
|
|
|
|
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|
const auto image = desc.GetSharp(ctx.info);
|
|
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
|
const auto type = image.GetViewType(desc.is_array);
|
|
const u32 sampled = desc.is_written ? 2 : 1;
|
|
switch (type) {
|
|
case AmdGpu::ImageType::Color1D:
|
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
|
|
case AmdGpu::ImageType::Color1DArray:
|
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, sampled, format);
|
|
case AmdGpu::ImageType::Color2D:
|
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, sampled, format);
|
|
case AmdGpu::ImageType::Color2DArray:
|
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, sampled, format);
|
|
case AmdGpu::ImageType::Color2DMsaa:
|
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
|
|
case AmdGpu::ImageType::Color3D:
|
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
|
|
default:
|
|
break;
|
|
}
|
|
throw InvalidArgument("Invalid texture type {}", type);
|
|
}
|
|
|
|
void EmitContext::DefineImagesAndSamplers() {
|
|
for (const auto& image_desc : info.images) {
|
|
const auto sharp = image_desc.GetSharp(info);
|
|
const auto nfmt = sharp.GetNumberFmt();
|
|
const bool is_integer = AmdGpu::IsInteger(nfmt);
|
|
const bool is_storage = image_desc.is_written;
|
|
const VectorIds& data_types = GetAttributeType(*this, nfmt);
|
|
const Id sampled_type = data_types[1];
|
|
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
|
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
|
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
|
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
|
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
|
|
images.push_back({
|
|
.data_types = &data_types,
|
|
.id = id,
|
|
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
|
|
.pointer_type = pointer_type,
|
|
.image_type = image_type,
|
|
.view_type = sharp.GetViewType(image_desc.is_array),
|
|
.is_integer = is_integer,
|
|
.is_storage = is_storage,
|
|
});
|
|
interfaces.push_back(id);
|
|
}
|
|
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
|
|
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
|
image_f32 = TypePointer(spv::StorageClass::Image, F32[1]);
|
|
}
|
|
if (info.samplers.empty()) {
|
|
return;
|
|
}
|
|
sampler_type = TypeSampler();
|
|
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
|
|
for (const auto& samp_desc : info.samplers) {
|
|
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
|
|
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
|
Name(id, fmt::format("{}_{}{}", stage, "samp", samp_desc.sharp_idx));
|
|
samplers.push_back(id);
|
|
interfaces.push_back(id);
|
|
}
|
|
}
|
|
|
|
void EmitContext::DefineSharedMemory() {
|
|
const auto num_types = std::popcount(static_cast<u32>(info.shared_types));
|
|
if (num_types == 0) {
|
|
return;
|
|
}
|
|
ASSERT(info.stage == Stage::Compute);
|
|
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
|
|
|
const auto make_type = [&](IR::Type type, Id element_type, u32 element_size,
|
|
std::string_view name) {
|
|
if (False(info.shared_types & type)) {
|
|
// Skip unused shared memory types.
|
|
return std::make_tuple(Id{}, Id{}, Id{});
|
|
}
|
|
|
|
const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
|
|
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
|
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
|
|
|
const Id struct_type{TypeStruct(array_type)};
|
|
MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
|
|
|
|
const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type);
|
|
const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
|
|
const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
|
|
Name(variable, name);
|
|
interfaces.push_back(variable);
|
|
|
|
if (num_types > 1) {
|
|
Decorate(struct_type, spv::Decoration::Block);
|
|
Decorate(variable, spv::Decoration::Aliased);
|
|
}
|
|
|
|
return std::make_tuple(variable, element_pointer, pointer);
|
|
};
|
|
std::tie(shared_memory_u16, shared_u16, shared_memory_u16_type) =
|
|
make_type(IR::Type::U16, U16, 2u, "shared_mem_u16");
|
|
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) =
|
|
make_type(IR::Type::U32, U32[1], 4u, "shared_mem_u32");
|
|
std::tie(shared_memory_u64, shared_u64, shared_memory_u64_type) =
|
|
make_type(IR::Type::U64, U64, 8u, "shared_mem_u64");
|
|
}
|
|
|
|
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
|
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
|
|
const auto func_type{TypeFunction(U32[1], F32[1])};
|
|
const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
|
|
const auto value{OpFunctionParameter(F32[1])};
|
|
Name(func, name);
|
|
AddLabel();
|
|
|
|
const auto raw_value{OpBitcast(U32[1], value)};
|
|
const auto exponent{
|
|
OpBitcast(S32[1], OpBitFieldSExtract(U32[1], raw_value, ConstU32(23U), ConstU32(8U)))};
|
|
const auto sign{OpBitFieldUExtract(U32[1], raw_value, ConstU32(31U), ConstU32(1U))};
|
|
|
|
const auto is_zero{OpLogicalOr(U1[1], OpIEqual(U1[1], raw_value, ConstU32(0U)),
|
|
OpIEqual(U1[1], sign, ConstU32(1U)))};
|
|
const auto is_nan{OpIsNan(U1[1], value)};
|
|
const auto is_inf{OpIsInf(U1[1], value)};
|
|
const auto is_denorm{OpSLessThanEqual(U1[1], exponent, ConstS32(-15))};
|
|
|
|
const auto denorm_mantissa{OpConvertFToU(
|
|
U32[1],
|
|
OpRoundEven(F32[1], OpFMul(F32[1], value,
|
|
ConstF32(static_cast<float>(1 << (mantissa_bits + 14))))))};
|
|
const auto denorm_overflow{
|
|
OpINotEqual(U1[1], OpShiftRightLogical(U32[1], denorm_mantissa, ConstU32(mantissa_bits)),
|
|
ConstU32(0U))};
|
|
const auto denorm{
|
|
OpSelect(U32[1], denorm_overflow, ConstU32(1U << mantissa_bits), denorm_mantissa)};
|
|
|
|
const auto norm_mantissa{OpConvertFToU(
|
|
U32[1],
|
|
OpRoundEven(F32[1],
|
|
OpLdexp(F32[1], value,
|
|
OpISub(S32[1], ConstS32(static_cast<int>(mantissa_bits)), exponent))))};
|
|
const auto norm_overflow{
|
|
OpUGreaterThanEqual(U1[1], norm_mantissa, ConstU32(2U << mantissa_bits))};
|
|
const auto norm_final_mantissa{OpBitwiseAnd(
|
|
U32[1],
|
|
OpSelect(U32[1], norm_overflow, OpShiftRightLogical(U32[1], norm_mantissa, ConstU32(1U)),
|
|
norm_mantissa),
|
|
ConstU32((1U << mantissa_bits) - 1))};
|
|
const auto norm_final_exponent{OpBitcast(
|
|
U32[1],
|
|
OpIAdd(S32[1],
|
|
OpSelect(S32[1], norm_overflow, OpIAdd(S32[1], exponent, ConstS32(1)), exponent),
|
|
ConstS32(15)))};
|
|
const auto norm{OpBitFieldInsert(U32[1], norm_final_mantissa, norm_final_exponent,
|
|
ConstU32(mantissa_bits), ConstU32(5U))};
|
|
|
|
const auto result{OpSelect(U32[1], is_zero, ConstU32(0U),
|
|
OpSelect(U32[1], is_nan, ConstU32(31u << mantissa_bits | 1U),
|
|
OpSelect(U32[1], is_inf, ConstU32(31U << mantissa_bits),
|
|
OpSelect(U32[1], is_denorm, denorm, norm))))};
|
|
|
|
OpReturnValue(result);
|
|
OpFunctionEnd();
|
|
return func;
|
|
}
|
|
|
|
Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_view name) {
|
|
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
|
|
const auto func_type{TypeFunction(F32[1], U32[1])};
|
|
const auto func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type)};
|
|
const auto value{OpFunctionParameter(U32[1])};
|
|
Name(func, name);
|
|
AddLabel();
|
|
|
|
const auto raw_mantissa{
|
|
OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))};
|
|
const auto mantissa{OpConvertUToF(F32[1], raw_mantissa)};
|
|
const auto exponent{OpBitcast(
|
|
S32[1], OpBitFieldSExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U)))};
|
|
|
|
const auto is_exp_neg_one{OpIEqual(U1[1], exponent, ConstS32(-1))};
|
|
const auto is_exp_zero{OpIEqual(U1[1], exponent, ConstS32(0))};
|
|
|
|
const auto is_zero{OpIEqual(U1[1], value, ConstU32(0u))};
|
|
const auto is_nan{
|
|
OpLogicalAnd(U1[1], is_exp_neg_one, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
|
|
const auto is_inf{
|
|
OpLogicalAnd(U1[1], is_exp_neg_one, OpIEqual(U1[1], raw_mantissa, ConstU32(0u)))};
|
|
const auto is_denorm{
|
|
OpLogicalAnd(U1[1], is_exp_zero, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
|
|
|
|
const auto denorm{OpFMul(F32[1], mantissa, ConstF32(1.f / (1 << 20)))};
|
|
const auto norm{OpLdexp(
|
|
F32[1],
|
|
OpFAdd(F32[1],
|
|
OpFMul(F32[1], mantissa, ConstF32(1.f / static_cast<float>(1 << mantissa_bits))),
|
|
ConstF32(1.f)),
|
|
exponent)};
|
|
|
|
const auto result{OpSelect(F32[1], is_zero, ConstF32(0.f),
|
|
OpSelect(F32[1], is_nan, ConstF32(NAN),
|
|
OpSelect(F32[1], is_inf, ConstF32(INFINITY),
|
|
OpSelect(F32[1], is_denorm, denorm, norm))))};
|
|
|
|
OpReturnValue(result);
|
|
OpFunctionEnd();
|
|
return func;
|
|
}
|
|
|
|
Id EmitContext::DefineGetBdaPointer() {
|
|
const auto caching_pagebits{
|
|
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS))};
|
|
const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
|
|
|
|
const auto func_type{TypeFunction(U64, U64)};
|
|
const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
|
|
const auto address{OpFunctionParameter(U64)};
|
|
Name(func, "get_bda_pointer");
|
|
AddLabel();
|
|
|
|
const auto fault_label{OpLabel()};
|
|
const auto available_label{OpLabel()};
|
|
const auto merge_label{OpLabel()};
|
|
|
|
// Get page BDA
|
|
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
|
|
const auto page32{OpUConvert(U32[1], page)};
|
|
const auto& bda_buffer{buffers[bda_pagetable_index]};
|
|
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
|
|
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
|
|
const auto bda{OpLoad(U64, bda_ptr)};
|
|
|
|
// Check if page is GPU cached
|
|
const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
|
|
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
|
OpBranchConditional(is_fault, fault_label, available_label);
|
|
|
|
// First time acces, mark as fault
|
|
AddLabel(fault_label);
|
|
const auto& fault_buffer{buffers[fault_buffer_index]};
|
|
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
|
|
const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))};
|
|
const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))};
|
|
const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)};
|
|
const auto fault_ptr{
|
|
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)};
|
|
const auto fault_value{OpLoad(U8, fault_ptr)};
|
|
const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)};
|
|
OpStore(fault_ptr, fault_value_masked);
|
|
|
|
// Return null pointer
|
|
const auto fallback_result{u64_zero_value};
|
|
OpBranch(merge_label);
|
|
|
|
// Value is available, compute address
|
|
AddLabel(available_label);
|
|
const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
|
|
const auto addr{OpIAdd(U64, bda, offset_in_bda)};
|
|
OpBranch(merge_label);
|
|
|
|
// Merge
|
|
AddLabel(merge_label);
|
|
const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
|
|
OpReturnValue(result);
|
|
OpFunctionEnd();
|
|
return func;
|
|
}
|
|
|
|
Id EmitContext::DefineReadConst(bool dynamic) {
|
|
const auto func_type{!dynamic ? TypeFunction(U32[1], U32[2], U32[1], U32[1])
|
|
: TypeFunction(U32[1], U32[2], U32[1])};
|
|
const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
|
|
const auto base{OpFunctionParameter(U32[2])};
|
|
const auto offset{OpFunctionParameter(U32[1])};
|
|
const auto flatbuf_offset{!dynamic ? OpFunctionParameter(U32[1]) : Id{}};
|
|
Name(func, dynamic ? "read_const_dynamic" : "read_const");
|
|
AddLabel();
|
|
|
|
const auto base_lo{OpUConvert(U64, OpCompositeExtract(U32[1], base, 0))};
|
|
const auto base_hi{OpUConvert(U64, OpCompositeExtract(U32[1], base, 1))};
|
|
const auto base_shift{OpShiftLeftLogical(U64, base_hi, ConstU32(32U))};
|
|
const auto base_addr{OpBitwiseOr(U64, base_lo, base_shift)};
|
|
const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
|
|
const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
|
|
|
|
const auto result = EmitMemoryRead(U32[1], addr, [&]() {
|
|
if (dynamic) {
|
|
return u32_zero_value;
|
|
} else {
|
|
const auto& flatbuf_buffer{buffers[flatbuf_index]};
|
|
ASSERT(flatbuf_buffer.binding >= 0 &&
|
|
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
|
|
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
|
|
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
|
|
flatbuf_offset)};
|
|
return OpLoad(U32[1], ptr);
|
|
}
|
|
});
|
|
|
|
OpReturnValue(result);
|
|
OpFunctionEnd();
|
|
return func;
|
|
}
|
|
|
|
void EmitContext::DefineFunctions() {
|
|
if (info.uses_pack_10_11_11) {
|
|
f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
|
|
f32_to_uf10 = DefineFloat32ToUfloatM5(5, "f32_to_uf10");
|
|
}
|
|
if (info.uses_unpack_10_11_11) {
|
|
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
|
|
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
|
|
}
|
|
if (info.dma_types != IR::Type::Void) {
|
|
get_bda_pointer = DefineGetBdaPointer();
|
|
}
|
|
|
|
if (True(info.readconst_types & Info::ReadConstType::Immediate)) {
|
|
LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses immediate ReadConst", info.pgm_hash);
|
|
read_const = DefineReadConst(false);
|
|
}
|
|
if (True(info.readconst_types & Info::ReadConstType::Dynamic)) {
|
|
LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses dynamic ReadConst", info.pgm_hash);
|
|
read_const_dynamic = DefineReadConst(true);
|
|
}
|
|
}
|
|
|
|
} // namespace Shader::Backend::SPIRV
|