Tessellation (#1528)

* shader_recompiler: Tessellation WIP

* fix compiler errors after merge

DONT MERGE set log file to /dev/null

DONT MERGE linux pthread bb fix

save work

DONT MERGE dump ir

save more work

fix mistake with ES shader

skip list

add input patch control points dynamic state

random stuff

* WIP Tessellation partial implementation. Squash commits

* test: make local/tcs use attr arrays

* attr arrays in TCS/TES

* dont define empty attr arrays

* switch to special opcodes for tess tcs/tes reads and tcs writes

* impl tcs/tes read attr insts

* rebase fix

* save some work

* save work probably broken and slow

* put Vertex LogicalStage after TCS and TES to fix bindings

* more refactors

* refactor pattern matching and optimize modulos (disabled)

* enable modulo opt

* copyright

* rebase fixes

* remove some prints

* remove some stuff

* Add TCS/TES support for shader patching and use LogicalStage

* refactor and handle wider DS instructions

* get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR

* stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs

* delete some more stuff

* update comments for current implementation

* some cleanup

* uint error

* more cleanup

* remove patch control points dynamic state (because runtime_info already depends on it)

* fix potential problem with determining passthrough

---------

Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com>
This commit is contained in:
baggins183 2024-12-14 02:56:17 -08:00 committed by GitHub
parent 3e22622508
commit 3c0c921ef5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 2146 additions and 189 deletions

View file

@ -1,6 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <span>
#include <type_traits>
#include <utility>
@ -13,6 +12,7 @@
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
namespace Shader::Backend::SPIRV {
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
return arg.VectorReg();
} else if constexpr (std::is_same_v<ArgType, const char*>) {
return arg.StringLiteral();
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
return arg.Patch();
}
UNREACHABLE();
}
template <auto func, bool is_first_arg_inst, size_t... I>
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
return main;
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
switch (primitive) {
case AmdGpu::TessellationType::Isoline:
return spv::ExecutionMode::Isolines;
case AmdGpu::TessellationType::Triangle:
return spv::ExecutionMode::Triangles;
case AmdGpu::TessellationType::Quad:
return spv::ExecutionMode::Quads;
}
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
switch (spacing) {
case AmdGpu::TessellationPartitioning::Integer:
return spv::ExecutionMode::SpacingEqual;
case AmdGpu::TessellationPartitioning::FracOdd:
return spv::ExecutionMode::SpacingFractionalOdd;
case AmdGpu::TessellationPartitioning::FracEven:
return spv::ExecutionMode::SpacingFractionalEven;
default:
break;
}
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
}
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
@ -248,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.uses_group_ballot) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
}
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
const auto stage = info.l_stage;
if (stage == LogicalStage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if (info.stage == Stage::Geometry) {
if (stage == LogicalStage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry);
}
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
}
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
}
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const auto& info = program.info;
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.info.stage) {
case Stage::Compute: {
switch (info.l_stage) {
case LogicalStage::Compute: {
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
execution_model = spv::ExecutionModel::GLCompute;
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
workgroup_size[1], workgroup_size[2]);
break;
}
case Stage::Export:
case Stage::Vertex:
case LogicalStage::Vertex:
execution_model = spv::ExecutionModel::Vertex;
break;
case Stage::Fragment:
case LogicalStage::TessellationControl:
execution_model = spv::ExecutionModel::TessellationControl;
ctx.AddCapability(spv::Capability::Tessellation);
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
ctx.runtime_info.hs_info.NumOutputControlPoints());
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
const auto& vs_info = ctx.runtime_info.vs_info;
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
ctx.AddExecutionMode(main,
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
? spv::ExecutionMode::VertexOrderCcw
: spv::ExecutionMode::VertexOrderCw);
break;
}
case LogicalStage::Fragment:
execution_model = spv::ExecutionModel::Fragment;
if (ctx.profile.lower_left_origin_mode) {
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
@ -292,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
break;
case Stage::Geometry:
case LogicalStage::Geometry:
execution_model = spv::ExecutionModel::Geometry;
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
ctx.AddExecutionMode(main,
@ -303,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.runtime_info.gs_info.num_invocations);
break;
default:
throw NotImplementedException("Stage {}", u32(program.info.stage));
UNREACHABLE_MSG("Stage {}", u32(info.stage));
}
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
@ -349,7 +397,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
const IR::Program& program, Bindings& binding) {
EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
DefineEntryPoint(program.info, ctx, main);
SetupCapabilities(program.info, profile, ctx);
SetupFloatMode(ctx, profile, runtime_info, main);
PatchPhiNodes(program, ctx);

View file

@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
void EmitBarrier(EmitContext& ctx) {
const auto execution{spv::Scope::Workgroup};
const auto memory{spv::Scope::Workgroup};
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
spv::MemorySemanticsMask::WorkgroupMemory};
spv::Scope memory;
spv::MemorySemanticsMask memory_semantics;
if (ctx.l_stage == Shader::LogicalStage::TessellationControl) {
memory = spv::Scope::Invocation;
memory_semantics = spv::MemorySemanticsMask::MaskNone;
} else {
memory = spv::Scope::Workgroup;
memory_semantics =
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory;
}
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
ctx.ConstU32(static_cast<u32>(memory)),
ctx.ConstU32(static_cast<u32>(memory_semantics)));

View file

@ -4,6 +4,9 @@
#include "common/assert.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/patch.h"
#include "shader_recompiler/runtime_info.h"
#include <magic_enum/magic_enum.hpp>
@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
return info.id;
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
ctx.ConstU32(element));
} else {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
const auto& info{ctx.output_params.at(attr_index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
return info.id;
} else {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
}
}
}
if (IR::IsMrt(attr)) {
@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
return {ctx.F32[1], false};
} else {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
}
}
if (IR::IsMrt(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
UNREACHABLE();
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.stage == Stage::Geometry) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (ctx.info.l_stage == LogicalStage::Geometry) {
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval) {
if (IR::IsTessCoord(attr)) {
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
} else if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
if (IR::IsParam(attr)) {
@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
}
return coord;
}
case IR::Attribute::TessellationEvaluationPointU:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
case IR::Attribute::TessellationEvaluationPointV:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
default:
throw NotImplementedException("Read attribute {}", attr);
UNREACHABLE_MSG("Read attribute {}", attr);
}
}
@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
ASSERT(ctx.info.stage == Stage::Geometry);
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
case IR::Attribute::InvocationId:
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
case IR::Attribute::PatchVertices:
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
case IR::Attribute::PackedHullInvocationInfo: {
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
// [0:8]: patch id within VGT
// [8:12]: output control point id
// But 0:8 should be treated as 0 for attribute addressing purposes
if (ctx.runtime_info.hs_info.IsPassthrough()) {
// Gcn shader would run with 1 thread, but we need to run a thread for
// each output control point.
// If Gcn shader uses this value, we should make sure all threads in the
// Vulkan shader use 0
return ctx.ConstU32(0u);
} else {
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
}
}
default:
throw NotImplementedException("Read U32 attribute {}", attr);
UNREACHABLE_MSG("Read U32 attribute {}", attr);
}
}
@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
}
}
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
vertex_index, attr_index, comp_index));
}
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
// Implied vertex index is invocation_id
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
Id pointer =
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
ctx.OpStore(pointer, value);
}
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
: ctx.input_f32};
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
return ctx.OpLoad(ctx.F32[1], pointer);
}
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
const Id pointer{[&] {
if (IR::IsGeneric(patch)) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
}
switch (patch) {
case IR::Patch::TessellationLodLeft:
case IR::Patch::TessellationLodRight:
case IR::Patch::TessellationLodTop:
case IR::Patch::TessellationLodBottom: {
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
const Id index_id{ctx.ConstU32(index)};
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
}
case IR::Patch::TessellationLodInteriorU:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
ctx.u32_zero_value);
case IR::Patch::TessellationLodInteriorV:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
default:
UNREACHABLE_MSG("Patch {}", u32(patch));
}
}()};
ctx.OpStore(pointer, value);
}
template <u32 N>
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];

View file

@ -9,6 +9,7 @@
namespace Shader::IR {
enum class Attribute : u64;
enum class ScalarReg : u32;
enum class Patch : u64;
class Inst;
class Value;
} // namespace Shader::IR
@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
void EmitReference(EmitContext&);
void EmitPhiMove(EmitContext&);
void EmitJoin(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx);
@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);

View file

@ -6,6 +6,7 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
#include <boost/container/static_vector.hpp>
@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) {
case Stage::Compute:
return "cs";
}
throw InvalidArgument("Invalid stage {}", u32(stage));
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
}
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
const Info& info_, Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, binding{binding_} {
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
@ -268,9 +269,8 @@ void EmitContext::DefineInputs() {
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
switch (l_stage) {
case LogicalStage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
@ -311,12 +311,11 @@ void EmitContext::DefineInputs() {
}
input_params[attrib.semantic] =
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
interfaces.push_back(id);
}
}
break;
}
case Stage::Fragment:
case LogicalStage::Fragment:
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
@ -351,15 +350,14 @@ void EmitContext::DefineInputs() {
}
input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
interfaces.push_back(attr_id);
}
break;
case Stage::Compute:
case LogicalStage::Compute:
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
local_invocation_id =
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
break;
case Stage::Geometry: {
case LogicalStage::Geometry: {
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const auto gl_per_vertex =
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
@ -389,15 +387,129 @@ void EmitContext::DefineInputs() {
}
break;
}
case LogicalStage::TessellationControl: {
invocation_id =
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
patch_vertices =
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
input_attr_array = DefineInput(patch_array_type, 0);
Name(input_attr_array, "in_attrs");
}
break;
}
case LogicalStage::TessellationEval: {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
input_attr_array = DefineInput(patch_array_type, 0);
Name(input_attr_array, "in_attrs");
}
u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineInput(F32[4], patch_base_location + index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_in{}", index));
patches[index] = id;
}
break;
}
default:
break;
}
}
void EmitContext::DefineOutputs() {
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
switch (l_stage) {
case LogicalStage::Vertex: {
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
// Might cause problems linking with tcs
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
info.stores.Get(IR::Attribute::Position3);
if (has_extra_pos_stores) {
const Id type{TypeArray(F32[1], ConstU32(8U))};
clip_distances =
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4;
if (num_attrs > 0) {
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
output_attr_array = DefineOutput(type, 0);
Name(output_attr_array, "out_attrs");
}
} else {
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
}
}
break;
}
case LogicalStage::TessellationControl: {
if (info.stores_tess_level_outer) {
const Id type{TypeArray(F32[1], ConstU32(4U))};
output_tess_level_outer =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
Decorate(output_tess_level_outer, spv::Decoration::Patch);
}
if (info.stores_tess_level_inner) {
const Id type{TypeArray(F32[1], ConstU32(2U))};
output_tess_level_inner =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
Decorate(output_tess_level_inner, spv::Decoration::Patch);
}
const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{TypeArray(
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
output_attr_array = DefineOutput(patch_array_type, 0);
Name(output_attr_array, "out_attrs");
}
u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineOutput(F32[4], patch_base_location + index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_out{}", index));
patches[index] = id;
}
break;
}
case LogicalStage::TessellationEval: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
@ -419,11 +531,10 @@ void EmitContext::DefineOutputs() {
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
interfaces.push_back(id);
}
break;
}
case Stage::Fragment:
case LogicalStage::Fragment:
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
@ -435,22 +546,22 @@ void EmitContext::DefineOutputs() {
const Id id{DefineOutput(type, i)};
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
interfaces.push_back(id);
}
break;
case Stage::Geometry: {
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
const Id id{DefineOutput(F32[4], attr_id)};
Name(id, fmt::format("out_attr{}", attr_id));
output_params[attr_id] = {id, output_f32, F32[1], 4u};
interfaces.push_back(id);
}
break;
}
default:
case LogicalStage::Compute:
break;
default:
UNREACHABLE();
}
}

View file

@ -46,14 +46,18 @@ public:
void DefineBufferOffsets();
void DefineInterpolatedAttribs();
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
Decorate(input_id, spv::Decoration::Location, location);
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
std::optional<spv::BuiltIn> builtin = std::nullopt) {
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
if (location) {
Decorate(input_id, spv::Decoration::Location, *location);
}
return input_id;
}
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
std::optional<spv::BuiltIn> builtin = std::nullopt) {
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
if (location) {
Decorate(output_id, spv::Decoration::Location, *location);
}
@ -131,7 +135,8 @@ public:
const Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage{};
Stage stage;
LogicalStage l_stage{};
Id void_id{};
Id U8{};
@ -188,8 +193,15 @@ public:
Id clip_distances{};
Id cull_distances{};
Id patch_vertices{};
Id output_tess_level_outer{};
Id output_tess_level_inner{};
Id tess_coord;
std::array<Id, 30> patches{};
Id workgroup_id{};
Id local_invocation_id{};
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
Id subgroup_local_invocation_id{};
Id image_u32{};
@ -252,6 +264,8 @@ public:
bool is_loaded{};
s32 buffer_handle{-1};
};
Id input_attr_array;
Id output_attr_array;
std::array<SpirvAttribute, IR::NumParams> input_params{};
std::array<SpirvAttribute, IR::NumParams> output_params{};
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};