Initial support of Geometry shaders (#1244)

* video_core: initial GS support

* fix for components mapping; missing prim type
This commit is contained in:
psucien 2024-10-06 00:26:50 +02:00 committed by GitHub
parent 5bb45dc7ba
commit 927bb0c175
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 944 additions and 268 deletions

View file

@ -5,6 +5,7 @@
#include <type_traits>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "common/func_traits.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@ -12,10 +13,38 @@
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "video_core/amdgpu/types.h"
namespace Shader::Backend::SPIRV {
namespace {
static constexpr spv::ExecutionMode GetInputPrimitiveType(AmdGpu::PrimitiveType type) {
switch (type) {
case AmdGpu::PrimitiveType::PointList:
return spv::ExecutionMode::InputPoints;
case AmdGpu::PrimitiveType::LineList:
return spv::ExecutionMode::InputLines;
case AmdGpu::PrimitiveType::TriangleList:
case AmdGpu::PrimitiveType::TriangleStrip:
return spv::ExecutionMode::Triangles;
default:
UNREACHABLE();
}
}
static constexpr spv::ExecutionMode GetOutputPrimitiveType(AmdGpu::GsOutputPrimitiveType type) {
switch (type) {
case AmdGpu::GsOutputPrimitiveType::PointList:
return spv::ExecutionMode::OutputVertices;
case AmdGpu::GsOutputPrimitiveType::LineStrip:
return spv::ExecutionMode::OutputLineStrip;
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
return spv::ExecutionMode::OutputTriangleStrip;
default:
UNREACHABLE();
}
}
template <auto func, typename... Args>
void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
@ -222,6 +251,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
workgroup_size[1], workgroup_size[2]);
break;
}
case Stage::Export:
case Stage::Vertex:
execution_model = spv::ExecutionModel::Vertex;
break;
@ -240,6 +270,16 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
break;
case Stage::Geometry:
execution_model = spv::ExecutionModel::Geometry;
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
ctx.AddExecutionMode(main,
GetOutputPrimitiveType(ctx.runtime_info.gs_info.out_primitive[0]));
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
ctx.runtime_info.gs_info.output_vertices);
ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations,
ctx.runtime_info.gs_info.num_invocations);
break;
default:
throw NotImplementedException("Stage {}", u32(program.info.stage));
}
@ -270,11 +310,20 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
if (program.info.stage == Stage::Vertex) {
switch (program.info.stage) {
case Stage::Export:
case Stage::Vertex:
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
break;
case Stage::Geometry:
ctx.AddCapability(spv::Capability::Geometry);
break;
default:
break;
}
PatchPhiNodes(program, ctx);
binding.user_data += program.info.ud_mask.NumRegs();
return ctx.Assemble();
}

View file

@ -46,6 +46,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
return info.id;
} else {
@ -164,7 +165,30 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.stage == Stage::Geometry) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index),
ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
@ -232,6 +256,9 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
case IR::Attribute::IsFrontFace:
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
ASSERT(ctx.info.stage == Stage::Geometry);
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
default:
throw NotImplementedException("Read U32 attribute {}", attr);
}

View file

@ -27,7 +27,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
void EmitReference(EmitContext&);
void EmitPhiMove(EmitContext&);
void EmitJoin(EmitContext& ctx);
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
@ -85,7 +84,7 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
@ -409,4 +408,7 @@ Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);
void EmitEmitVertex(EmitContext& ctx);
void EmitEmitPrimitive(EmitContext& ctx);
} // namespace Shader::Backend::SPIRV

View file

@ -41,6 +41,14 @@ void EmitDiscardCond(EmitContext& ctx, Id condition) {
ctx.AddLabel(merge_label);
}
void EmitEmitVertex(EmitContext& ctx) {
ctx.OpEmitVertex();
}
void EmitEmitPrimitive(EmitContext& ctx) {
ctx.OpEndPrimitive();
}
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
throw NotImplementedException("Geometry streams");
}

View file

@ -1,8 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "video_core/amdgpu/types.h"
#include <boost/container/static_vector.hpp>
#include <fmt/format.h>
@ -32,6 +34,19 @@ std::string_view StageName(Stage stage) {
throw InvalidArgument("Invalid stage {}", u32(stage));
}
static constexpr u32 NumVertices(AmdGpu::GsOutputPrimitiveType type) {
switch (type) {
case AmdGpu::GsOutputPrimitiveType::PointList:
return 1u;
case AmdGpu::GsOutputPrimitiveType::LineStrip:
return 2u;
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
return 3u;
default:
UNREACHABLE();
}
}
template <typename... Args>
void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
@ -222,6 +237,7 @@ void EmitContext::DefineInputs() {
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
@ -290,6 +306,38 @@ void EmitContext::DefineInputs() {
local_invocation_id =
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
break;
case Stage::Geometry: {
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const auto gl_per_vertex =
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
"gl_PerVertex");
MemberName(gl_per_vertex, 0, "gl_Position");
MemberName(gl_per_vertex, 1, "gl_PointSize");
MemberName(gl_per_vertex, 2, "gl_ClipDistance");
MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn,
static_cast<std::uint32_t>(spv::BuiltIn::Position));
MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn,
static_cast<std::uint32_t>(spv::BuiltIn::PointSize));
MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn,
static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance));
Decorate(gl_per_vertex, spv::Decoration::Block);
const auto vertices_in =
TypeArray(gl_per_vertex, ConstU32(NumVertices(runtime_info.gs_info.out_primitive[0])));
gl_in = Name(DefineVar(vertices_in, spv::StorageClass::Input), "gl_in");
interfaces.push_back(gl_in);
const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u;
for (int param_id = 0; param_id < num_params; ++param_id) {
const IR::Attribute param{IR::Attribute::Param0 + param_id};
const Id type{
TypeArray(F32[4], ConstU32(NumVertices(runtime_info.gs_info.out_primitive[0])))};
const Id id{DefineInput(type, param_id)};
Name(id, fmt::format("in_attr{}", param_id));
input_params[param_id] = {id, input_f32, F32[1], 4};
interfaces.push_back(id);
}
break;
}
default:
break;
}
@ -297,6 +345,7 @@ void EmitContext::DefineInputs() {
void EmitContext::DefineOutputs() {
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
@ -338,6 +387,18 @@ void EmitContext::DefineOutputs() {
interfaces.push_back(id);
}
break;
case Stage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 attr_id = 0; attr_id < runtime_info.gs_info.copy_data.num_attrs; attr_id++) {
const IR::Attribute param{IR::Attribute::Param0 + attr_id};
const Id id{DefineOutput(F32[4], attr_id)};
Name(id, fmt::format("out_attr{}", attr_id));
output_params[attr_id] = {id, output_f32, F32[1], 4u};
interfaces.push_back(id);
}
break;
}
default:
break;
}

View file

@ -168,9 +168,12 @@ public:
Id output_f32{};
Id output_s32{};
Id gl_in{};
boost::container::small_vector<Id, 16> interfaces;
Id output_position{};
Id primitive_id{};
Id vertex_index{};
Id instance_id{};
Id push_data_block{};

View file

@ -0,0 +1,65 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/ir/attribute.h"
namespace Shader {
CopyShaderData ParseCopyShader(const std::span<const u32>& code) {
Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()};
Gcn::GcnDecodeContext decoder;
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
std::array<s32, 32> offsets{};
std::fill(offsets.begin(), offsets.end(), -1);
CopyShaderData data{};
Gcn::OperandField sgpr{};
auto last_attr{IR::Attribute::Position0};
s32 soffset{0};
while (!code_slice.atEnd()) {
auto inst = decoder.decodeInstruction(code_slice);
switch (inst.opcode) {
case Gcn::Opcode::S_MOVK_I32: {
sgpr = inst.dst[0].field;
soffset = inst.control.sopk.simm;
break;
}
case Gcn::Opcode::EXP: {
const auto& exp = inst.control.exp;
const IR::Attribute semantic = static_cast<IR::Attribute>(exp.target);
for (int i = 0; i < inst.src_count; ++i) {
const auto ofs = offsets[inst.src[i].code];
if (ofs != -1) {
data.attr_map[ofs] = {semantic, i};
if (semantic > last_attr) {
last_attr = semantic;
}
}
}
break;
}
case Gcn::Opcode::BUFFER_LOAD_DWORD: {
offsets[inst.src[1].code] = inst.control.mubuf.offset;
if (inst.src[3].field != Gcn::OperandField::ConstZero) {
ASSERT(inst.src[3].field == sgpr);
offsets[inst.src[1].code] += soffset;
}
break;
}
default:
break;
}
}
if (last_attr != IR::Attribute::Position0) {
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
}
return data;
}
} // namespace Shader

View file

@ -0,0 +1,21 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <unordered_map>
#include "common/types.h"
#include "shader_recompiler/ir/attribute.h"
namespace Shader {
struct CopyShaderData {
std::unordered_map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
u32 num_attrs{0};
};
CopyShaderData ParseCopyShader(const std::span<const u32>& code);
} // namespace Shader

View file

@ -2491,4 +2491,23 @@ enum class ImageAddrComponent : u32 {
Clamp,
};
struct SendMsgSimm {
enum class Message : u32 {
Interrupt = 1,
Gs = 2,
GsDone = 3,
System = 15,
};
enum class GsOp : u32 {
Nop = 0,
Cut = 1,
Emit = 2,
EmitCut = 3,
};
Message msg : 4;
GsOp op : 2;
};
} // namespace Shader::Gcn

View file

@ -55,12 +55,6 @@ void Translator::EmitDataShare(const GcnInst& inst) {
}
}
// SOPP
void Translator::S_BARRIER() {
ir.Barrier();
}
// VOP2
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {

View file

@ -540,14 +540,6 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
}
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
const IR::ScalarReg dst{inst.dst[0].code};
ir.SetScalarReg(dst, ir.Imm32(pc));
ir.SetScalarReg(dst + 1, ir.Imm32(0));
}
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View file

@ -0,0 +1,75 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/opcodes.h"
#include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn {
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_BARRIER:
return S_BARRIER();
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
return;
case Opcode::S_GETPC_B64:
return S_GETPC_B64(pc, inst);
case Opcode::S_WAITCNT:
case Opcode::S_NOP:
case Opcode::S_ENDPGM:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1:
case Opcode::S_CBRANCH_VCCNZ:
case Opcode::S_CBRANCH_VCCZ:
case Opcode::S_CBRANCH_EXECNZ:
case Opcode::S_BRANCH:
return;
case Opcode::S_SENDMSG:
S_SENDMSG(inst);
return;
default:
UNREACHABLE();
}
}
void Translator::S_BARRIER() {
ir.Barrier();
}
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
const IR::ScalarReg dst{inst.dst[0].code};
ir.SetScalarReg(dst, ir.Imm32(pc));
ir.SetScalarReg(dst + 1, ir.Imm32(0));
}
void Translator::S_SENDMSG(const GcnInst& inst) {
const auto& simm = reinterpret_cast<const SendMsgSimm&>(inst.control.sopp.simm);
switch (simm.msg) {
case SendMsgSimm::Message::Gs: {
switch (simm.op) {
case SendMsgSimm::GsOp::Nop:
break;
case SendMsgSimm::GsOp::Cut:
ir.EmitPrimitive();
break;
case SendMsgSimm::GsOp::Emit:
ir.EmitVertex();
break;
default:
UNREACHABLE();
}
break;
}
case SendMsgSimm::Message::GsDone: {
break;
}
default:
UNREACHABLE();
}
}
} // namespace Shader::Gcn

View file

@ -10,6 +10,7 @@
#include "shader_recompiler/info.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
#define MAGIC_ENUM_RANGE_MIN 0
#define MAGIC_ENUM_RANGE_MAX 1515
@ -35,6 +36,7 @@ void Translator::EmitPrologue() {
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (info.stage) {
case Stage::Vertex:
case Stage::Export:
// v0: vertex ID, always present
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
// v1: instance ID, step rate 0
@ -76,6 +78,20 @@ void Translator::EmitPrologue() {
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
}
break;
case Stage::Geometry:
switch (runtime_info.gs_info.out_primitive[0]) {
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
[[fallthrough]];
case AmdGpu::GsOutputPrimitiveType::LineStrip:
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
[[fallthrough]];
default:
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
break;
}
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
default:
throw NotImplementedException("Unknown shader stage");
}
@ -359,7 +375,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("vs_{:#018x}_fetch.bin", info.pgm_hash);
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size);
}
@ -424,31 +440,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
}
}
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_BARRIER:
return S_BARRIER();
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
return;
case Opcode::S_GETPC_B64:
return S_GETPC_B64(pc, inst);
case Opcode::S_WAITCNT:
case Opcode::S_NOP:
case Opcode::S_ENDPGM:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1:
case Opcode::S_CBRANCH_VCCNZ:
case Opcode::S_CBRANCH_VCCZ:
case Opcode::S_CBRANCH_EXECNZ:
case Opcode::S_BRANCH:
return;
default:
UNREACHABLE();
}
}
void Translator::LogMissingOpcode(const GcnInst& inst) {
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
@ -467,7 +458,7 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex);
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
translator.EmitFetch(inst);
continue;
}

View file

@ -116,6 +116,7 @@ public:
// SOPP
void S_BARRIER();
void S_SENDMSG(const GcnInst& inst);
// Scalar Memory
// SMRD

View file

@ -160,9 +160,19 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf;
const bool is_ring = mtbuf.glc && mtbuf.slc;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value soffset{GetSrc(inst.src[3])};
if (info.stage != Stage::Geometry) {
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
"Non immediate offset not supported");
}
const IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
}
if (mtbuf.idxen && mtbuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
@ -171,13 +181,12 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
}
return {};
}();
const IR::Value soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mtbuf.idxen);
info.offset_enable.Assign(mtbuf.offen);
info.inst_offset.Assign(mtbuf.offset);
IR::BufferInstInfo buffer_info{};
buffer_info.index_enable.Assign(mtbuf.idxen);
buffer_info.offset_enable.Assign(mtbuf.offen);
buffer_info.inst_offset.Assign(mtbuf.offset);
buffer_info.ring_access.Assign(is_ring);
if (is_typed) {
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
@ -190,7 +199,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info);
const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::U32{value});
@ -230,9 +239,20 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf;
const bool is_ring = mtbuf.glc && mtbuf.slc;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value address = [&] -> IR::Value {
const IR::Value soffset{GetSrc(inst.src[3])};
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
"Non immediate offset not supported");
}
IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
}
if (mtbuf.idxen && mtbuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
@ -241,13 +261,12 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
}
return {};
}();
const IR::Value soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mtbuf.idxen);
info.offset_enable.Assign(mtbuf.offen);
info.inst_offset.Assign(mtbuf.offset);
IR::BufferInstInfo buffer_info{};
buffer_info.index_enable.Assign(mtbuf.idxen);
buffer_info.offset_enable.Assign(mtbuf.offen);
buffer_info.inst_offset.Assign(mtbuf.offset);
buffer_info.ring_access.Assign(is_ring);
if (is_typed) {
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
@ -278,7 +297,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBuffer(num_dwords, handle, address, value, info);
ir.StoreBuffer(num_dwords, handle, address, value, buffer_info);
}
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {

View file

@ -225,8 +225,9 @@ struct Info {
}
void AddBindings(Backend::Bindings& bnd) const {
bnd.buffer += buffers.size() + texture_buffers.size();
bnd.unified += bnd.buffer + images.size() + samplers.size();
const auto total_buffers = buffers.size() + texture_buffers.size();
bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();
}

View file

@ -6,14 +6,6 @@
namespace Shader::IR {
bool IsParam(Attribute attribute) noexcept {
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
}
bool IsMrt(Attribute attribute) noexcept {
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
}
std::string NameOf(Attribute attribute) {
switch (attribute) {
case Attribute::RenderTarget0:

View file

@ -81,9 +81,17 @@ constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32;
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
constexpr bool IsPosition(Attribute attribute) noexcept {
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
}
[[nodiscard]] bool IsMrt(Attribute attribute) noexcept;
constexpr bool IsParam(Attribute attribute) noexcept {
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
}
constexpr bool IsMrt(Attribute attribute) noexcept {
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
}
[[nodiscard]] std::string NameOf(Attribute attribute);

View file

@ -249,8 +249,8 @@ void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
@ -1553,4 +1553,12 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color);
}
void IREmitter::EmitVertex() {
Inst(Opcode::EmitVertex);
}
void IREmitter::EmitPrimitive() {
Inst(Opcode::EmitPrimitive);
}
} // namespace Shader::IR

View file

@ -78,7 +78,7 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
@ -310,6 +310,9 @@ public:
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
TextureInstInfo info);
void EmitVertex();
void EmitPrimitive();
private:
IR::Block::iterator insertion_point;

View file

@ -89,6 +89,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::ImageAtomicOr32:
case Opcode::ImageAtomicXor32:
case Opcode::ImageAtomicExchange32:
case Opcode::EmitVertex:
case Opcode::EmitPrimitive:
return true;
default:
return false;

View file

@ -24,6 +24,10 @@ OPCODE(Barrier, Void,
OPCODE(WorkgroupMemoryBarrier, Void, )
OPCODE(DeviceMemoryBarrier, Void, )
// Geometry shader specific
OPCODE(EmitVertex, Void, )
OPCODE(EmitPrimitive, Void, )
// Shared memory operations
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
@ -49,7 +53,7 @@ OPCODE(GetVectorRegister, U32, Vect
OPCODE(SetVectorRegister, Void, VectorReg, U32, )
OPCODE(GetGotoVariable, U1, U32, )
OPCODE(SetGotoVariable, Void, U32, U1, )
OPCODE(GetAttribute, F32, Attribute, U32, )
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
OPCODE(GetAttributeU32, U32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )

View file

@ -15,5 +15,7 @@ void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
} // namespace Shader::Optimization

View file

@ -0,0 +1,110 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/recompiler.h"
namespace Shader::Optimization {
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage) {
const auto& ForEachInstruction = [&](auto func) {
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
func(ir, inst);
}
}
};
switch (stage) {
case Stage::Export: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::StoreBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
break;
}
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
ASSERT(offset < runtime_info.es_info.vertex_data_size * 4);
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
const auto attrib =
IR::Value{offset < 16 ? IR::Attribute::Position0
: IR::Attribute::Param0 + (offset / 16 - 1)};
const auto comp = (offset / 4) % 4;
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
inst.ClearArgs();
inst.SetArg(0, attrib);
inst.SetArg(1, data);
inst.SetArg(2, ir.Imm32(comp));
break;
}
default:
break;
}
});
break;
}
case Stage::Geometry: {
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::LoadBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
break;
}
const auto shl_inst = inst.Arg(1).TryInstRecursive();
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
const auto bucket = offset.Resolve().U32() / 256u;
const auto attrib = bucket < 4 ? IR::Attribute::Position0
: IR::Attribute::Param0 + (bucket / 4 - 1);
const auto comp = bucket % 4;
auto attr_value = ir.GetAttribute(attrib, comp, vertex_id);
inst.ReplaceOpcode(IR::Opcode::BitCastU32F32);
inst.ClearArgs();
inst.SetArg(0, attr_value);
break;
}
case IR::Opcode::StoreBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
break;
}
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u;
const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size;
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs);
ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend());
const auto& [attr, comp] = it->second;
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
inst.ClearArgs();
inst.SetArg(0, IR::Value{attr});
inst.SetArg(1, data);
inst.SetArg(2, ir.Imm32(comp));
break;
}
default:
break;
}
});
break;
}
default:
break;
}
}
} // namespace Shader::Optimization

View file

@ -7,7 +7,6 @@
#include "common/bit_field.h"
#include "common/enum.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR {
@ -67,6 +66,7 @@ union BufferInstInfo {
BitField<0, 1, u32> index_enable;
BitField<1, 1, u32> offset_enable;
BitField<2, 12, u32> inst_offset;
BitField<14, 1, u32> ring_access; // global + system coherency
};
enum class ScalarReg : u32 {

View file

@ -63,6 +63,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
if (program.info.stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);

View file

@ -8,6 +8,8 @@
#include "common/assert.h"
#include "common/types.h"
#include "frontend/copy_shader.h"
#include "video_core/amdgpu/types.h"
namespace Shader {
@ -26,13 +28,11 @@ constexpr u32 MaxStageTypes = 6;
return static_cast<Stage>(index);
}
enum class MrtSwizzle : u8 {
Identity = 0,
Alt = 1,
Reverse = 2,
ReverseAlt = 3,
struct ExportRuntimeInfo {
u32 vertex_data_size;
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
};
static constexpr u32 MaxColorBuffers = 8;
enum class VsOutput : u8 {
None,
@ -70,6 +70,33 @@ struct VertexRuntimeInfo {
}
};
static constexpr auto GsMaxOutputStreams = 4u;
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
struct GeometryRuntimeInfo {
u32 num_invocations{};
u32 output_vertices{};
u32 in_vertex_data_size{};
u32 out_vertex_data_size{};
AmdGpu::PrimitiveType in_primitive;
GsOutputPrimTypes out_primitive;
CopyShaderData copy_data;
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
return num_invocations && other.num_invocations &&
output_vertices == other.output_vertices && in_primitive == other.in_primitive &&
std::ranges::equal(out_primitive, other.out_primitive) &&
std::ranges::equal(copy_data.attr_map, other.copy_data.attr_map);
}
};
enum class MrtSwizzle : u8 {
Identity = 0,
Alt = 1,
Reverse = 2,
ReverseAlt = 3,
};
static constexpr u32 MaxColorBuffers = 8;
struct FragmentRuntimeInfo {
struct PsInput {
u8 param_index;
@ -114,7 +141,9 @@ struct RuntimeInfo {
u32 num_user_data;
u32 num_input_vgprs;
u32 num_allocated_vgprs;
ExportRuntimeInfo es_info;
VertexRuntimeInfo vs_info;
GeometryRuntimeInfo gs_info;
FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info;
@ -128,6 +157,10 @@ struct RuntimeInfo {
return vs_info == other.vs_info;
case Stage::Compute:
return cs_info == other.cs_info;
case Stage::Export:
return es_info == other.es_info;
case Stage::Geometry:
return gs_info == other.gs_info;
default:
return true;
}