mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-25 21:03:18 +00:00
Initial support of Geometry shaders (#1244)
* video_core: initial GS support * fix for components mapping; missing prim type
This commit is contained in:
parent
5bb45dc7ba
commit
927bb0c175
40 changed files with 944 additions and 268 deletions
|
@ -6,14 +6,6 @@
|
|||
|
||||
namespace Shader::IR {
|
||||
|
||||
bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
||||
bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
std::string NameOf(Attribute attribute) {
|
||||
switch (attribute) {
|
||||
case Attribute::RenderTarget0:
|
||||
|
|
|
@ -81,9 +81,17 @@ constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
|||
constexpr size_t NumRenderTargets = 8;
|
||||
constexpr size_t NumParams = 32;
|
||||
|
||||
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
|
||||
constexpr bool IsPosition(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsMrt(Attribute attribute) noexcept;
|
||||
constexpr bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
||||
constexpr bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||
|
||||
|
|
|
@ -249,8 +249,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||
Inst(Opcode::SetM0, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||
|
@ -1553,4 +1553,12 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value
|
|||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color);
|
||||
}
|
||||
|
||||
void IREmitter::EmitVertex() {
|
||||
Inst(Opcode::EmitVertex);
|
||||
}
|
||||
|
||||
void IREmitter::EmitPrimitive() {
|
||||
Inst(Opcode::EmitPrimitive);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -78,7 +78,7 @@ public:
|
|||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
|
@ -310,6 +310,9 @@ public:
|
|||
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||
TextureInstInfo info);
|
||||
|
||||
void EmitVertex();
|
||||
void EmitPrimitive();
|
||||
|
||||
private:
|
||||
IR::Block::iterator insertion_point;
|
||||
|
||||
|
|
|
@ -89,6 +89,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::ImageAtomicOr32:
|
||||
case Opcode::ImageAtomicXor32:
|
||||
case Opcode::ImageAtomicExchange32:
|
||||
case Opcode::EmitVertex:
|
||||
case Opcode::EmitPrimitive:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -24,6 +24,10 @@ OPCODE(Barrier, Void,
|
|||
OPCODE(WorkgroupMemoryBarrier, Void, )
|
||||
OPCODE(DeviceMemoryBarrier, Void, )
|
||||
|
||||
// Geometry shader specific
|
||||
OPCODE(EmitVertex, Void, )
|
||||
OPCODE(EmitPrimitive, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
|
@ -49,7 +53,7 @@ OPCODE(GetVectorRegister, U32, Vect
|
|||
OPCODE(SetVectorRegister, Void, VectorReg, U32, )
|
||||
OPCODE(GetGotoVariable, U1, U32, )
|
||||
OPCODE(SetGotoVariable, Void, U32, U1, )
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, )
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
|
||||
|
|
|
@ -15,5 +15,7 @@ void ConstantPropagationPass(IR::BlockList& program);
|
|||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
110
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
Normal file
110
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
Normal file
|
@ -0,0 +1,110 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage) {
|
||||
const auto& ForEachInstruction = [&](auto func) {
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
func(ir, inst);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
switch (stage) {
|
||||
case Stage::Export: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||
ASSERT(offset < runtime_info.es_info.vertex_data_size * 4);
|
||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||
const auto attrib =
|
||||
IR::Value{offset < 16 ? IR::Attribute::Position0
|
||||
: IR::Attribute::Param0 + (offset / 16 - 1)};
|
||||
const auto comp = (offset / 4) % 4;
|
||||
|
||||
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
||||
inst.ClearArgs();
|
||||
inst.SetArg(0, attrib);
|
||||
inst.SetArg(1, data);
|
||||
inst.SetArg(2, ir.Imm32(comp));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case Stage::Geometry: {
|
||||
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||
const auto bucket = offset.Resolve().U32() / 256u;
|
||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||
: IR::Attribute::Param0 + (bucket / 4 - 1);
|
||||
const auto comp = bucket % 4;
|
||||
|
||||
auto attr_value = ir.GetAttribute(attrib, comp, vertex_id);
|
||||
inst.ReplaceOpcode(IR::Opcode::BitCastU32F32);
|
||||
inst.ClearArgs();
|
||||
inst.SetArg(0, attr_value);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||
const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u;
|
||||
const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size;
|
||||
|
||||
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
||||
const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs);
|
||||
ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend());
|
||||
const auto& [attr, comp] = it->second;
|
||||
|
||||
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
||||
inst.ClearArgs();
|
||||
inst.SetArg(0, IR::Value{attr});
|
||||
inst.SetArg(1, data);
|
||||
inst.SetArg(2, ir.Imm32(comp));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -7,7 +7,6 @@
|
|||
#include "common/bit_field.h"
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -67,6 +66,7 @@ union BufferInstInfo {
|
|||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue