Tessellation (#1528)

* shader_recompiler: Tessellation WIP

* fix compiler errors after merge

DONT MERGE set log file to /dev/null

DONT MERGE linux pthread bb fix

save work

DONT MERGE dump ir

save more work

fix mistake with ES shader

skip list

add input patch control points dynamic state

random stuff

* WIP Tessellation partial implementation. Squash commits

* test: make local/tcs use attr arrays

* attr arrays in TCS/TES

* dont define empty attr arrays

* switch to special opcodes for tess tcs/tes reads and tcs writes

* impl tcs/tes read attr insts

* rebase fix

* save some work

* save work probably broken and slow

* put Vertex LogicalStage after TCS and TES to fix bindings

* more refactors

* refactor pattern matching and optimize modulos (disabled)

* enable modulo opt

* copyright

* rebase fixes

* remove some prints

* remove some stuff

* Add TCS/TES support for shader patching and use LogicalStage

* refactor and handle wider DS instructions

* get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR

* stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs

* delete some more stuff

* update comments for current implementation

* some cleanup

* uint error

* more cleanup

* remove patch control points dynamic state (because runtime_info already depends on it)

* fix potential problem with determining passthrough

---------

Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com>
This commit is contained in:
baggins183 2024-12-14 02:56:17 -08:00 committed by GitHub
parent 3e22622508
commit 3c0c921ef5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 2146 additions and 189 deletions

View file

@ -8,6 +8,8 @@
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
@ -34,9 +36,8 @@ void Translator::EmitPrologue() {
}
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (info.stage) {
case Stage::Vertex:
case Stage::Export:
switch (info.l_stage) {
case LogicalStage::Vertex:
// v0: vertex ID, always present
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
// v1: instance ID, step rate 0
@ -52,7 +53,7 @@ void Translator::EmitPrologue() {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
}
break;
case Stage::Fragment:
case LogicalStage::Fragment:
dst_vreg = IR::VectorReg::V0;
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
++dst_vreg; // I
@ -122,7 +123,30 @@ void Translator::EmitPrologue() {
}
}
break;
case Stage::Compute:
case LogicalStage::TessellationControl: {
// Should be laid out like:
// [0:8]: patch id within VGT
// [8:12]: output control point id
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
// TODO PrimitiveId is probably V2 but haven't seen it yet
break;
}
case LogicalStage::TessellationEval:
ir.SetVectorReg(IR::VectorReg::V0,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
// compiler-generated address calculations. Its probably the patch id within the
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
// memory)
// Set to 0. See explanation in comment describing hull/domain passes
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
// V3 is the actual PrimitiveID as intended by the shader author.
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
case LogicalStage::Compute:
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
@ -137,7 +161,7 @@ void Translator::EmitPrologue() {
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
}
break;
case Stage::Geometry:
case LogicalStage::Geometry:
switch (runtime_info.gs_info.out_primitive[0]) {
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
@ -152,7 +176,7 @@ void Translator::EmitPrologue() {
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
default:
throw NotImplementedException("Unknown shader stage");
UNREACHABLE_MSG("Unknown shader stage");
}
}
@ -503,7 +527,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
info.stage == Stage::Local);
translator.EmitFetch(inst);
continue;
}