mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-18 09:24:58 +00:00
Initial support of Geometry shaders (#1244)
* video_core: initial GS support * fix for components mapping; missing prim type
This commit is contained in:
parent
5bb45dc7ba
commit
927bb0c175
40 changed files with 944 additions and 268 deletions
65
src/shader_recompiler/frontend/copy_shader.cpp
Normal file
65
src/shader_recompiler/frontend/copy_shader.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
CopyShaderData ParseCopyShader(const std::span<const u32>& code) {
|
||||
Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()};
|
||||
Gcn::GcnDecodeContext decoder;
|
||||
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
||||
std::array<s32, 32> offsets{};
|
||||
std::fill(offsets.begin(), offsets.end(), -1);
|
||||
|
||||
CopyShaderData data{};
|
||||
Gcn::OperandField sgpr{};
|
||||
auto last_attr{IR::Attribute::Position0};
|
||||
s32 soffset{0};
|
||||
while (!code_slice.atEnd()) {
|
||||
auto inst = decoder.decodeInstruction(code_slice);
|
||||
switch (inst.opcode) {
|
||||
case Gcn::Opcode::S_MOVK_I32: {
|
||||
sgpr = inst.dst[0].field;
|
||||
soffset = inst.control.sopk.simm;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::EXP: {
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute semantic = static_cast<IR::Attribute>(exp.target);
|
||||
for (int i = 0; i < inst.src_count; ++i) {
|
||||
const auto ofs = offsets[inst.src[i].code];
|
||||
if (ofs != -1) {
|
||||
data.attr_map[ofs] = {semantic, i};
|
||||
if (semantic > last_attr) {
|
||||
last_attr = semantic;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::BUFFER_LOAD_DWORD: {
|
||||
offsets[inst.src[1].code] = inst.control.mubuf.offset;
|
||||
if (inst.src[3].field != Gcn::OperandField::ConstZero) {
|
||||
ASSERT(inst.src[3].field == sgpr);
|
||||
offsets[inst.src[1].code] += soffset;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_attr != IR::Attribute::Position0) {
|
||||
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
} // namespace Shader
|
21
src/shader_recompiler/frontend/copy_shader.h
Normal file
21
src/shader_recompiler/frontend/copy_shader.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct CopyShaderData {
|
||||
std::unordered_map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
|
||||
u32 num_attrs{0};
|
||||
};
|
||||
|
||||
CopyShaderData ParseCopyShader(const std::span<const u32>& code);
|
||||
|
||||
} // namespace Shader
|
|
@ -2491,4 +2491,23 @@ enum class ImageAddrComponent : u32 {
|
|||
Clamp,
|
||||
};
|
||||
|
||||
struct SendMsgSimm {
|
||||
enum class Message : u32 {
|
||||
Interrupt = 1,
|
||||
Gs = 2,
|
||||
GsDone = 3,
|
||||
System = 15,
|
||||
};
|
||||
|
||||
enum class GsOp : u32 {
|
||||
Nop = 0,
|
||||
Cut = 1,
|
||||
Emit = 2,
|
||||
EmitCut = 3,
|
||||
};
|
||||
|
||||
Message msg : 4;
|
||||
GsOp op : 2;
|
||||
};
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -55,12 +55,6 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
// SOPP
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
// VOP2
|
||||
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
|
|
|
@ -540,14 +540,6 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
|
|
75
src/shader_recompiler/frontend/translate/scalar_flow.cpp
Normal file
75
src/shader_recompiler/frontend/translate/scalar_flow.cpp
Normal file
|
@ -0,0 +1,75 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/opcodes.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_BARRIER:
|
||||
return S_BARRIER();
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
case Opcode::S_WAITCNT:
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_ENDPGM:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_CBRANCH_EXECNZ:
|
||||
case Opcode::S_BRANCH:
|
||||
return;
|
||||
case Opcode::S_SENDMSG:
|
||||
S_SENDMSG(inst);
|
||||
return;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::S_SENDMSG(const GcnInst& inst) {
|
||||
const auto& simm = reinterpret_cast<const SendMsgSimm&>(inst.control.sopp.simm);
|
||||
switch (simm.msg) {
|
||||
case SendMsgSimm::Message::Gs: {
|
||||
switch (simm.op) {
|
||||
case SendMsgSimm::GsOp::Nop:
|
||||
break;
|
||||
case SendMsgSimm::GsOp::Cut:
|
||||
ir.EmitPrimitive();
|
||||
break;
|
||||
case SendMsgSimm::GsOp::Emit:
|
||||
ir.EmitVertex();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SendMsgSimm::Message::GsDone: {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
|
@ -10,6 +10,7 @@
|
|||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
#define MAGIC_ENUM_RANGE_MIN 0
|
||||
#define MAGIC_ENUM_RANGE_MAX 1515
|
||||
|
@ -35,6 +36,7 @@ void Translator::EmitPrologue() {
|
|||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Export:
|
||||
// v0: vertex ID, always present
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||
// v1: instance ID, step rate 0
|
||||
|
@ -76,6 +78,20 @@ void Translator::EmitPrologue() {
|
|||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||
[[fallthrough]];
|
||||
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
|
||||
[[fallthrough]];
|
||||
default:
|
||||
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
|
||||
break;
|
||||
}
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
}
|
||||
|
@ -359,7 +375,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("vs_{:#018x}_fetch.bin", info.pgm_hash);
|
||||
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
@ -424,31 +440,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_BARRIER:
|
||||
return S_BARRIER();
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
case Opcode::S_WAITCNT:
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_ENDPGM:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_CBRANCH_EXECNZ:
|
||||
case Opcode::S_BRANCH:
|
||||
return;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::LogMissingOpcode(const GcnInst& inst) {
|
||||
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
|
||||
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
|
||||
|
@ -467,7 +458,7 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
|||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex);
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
|
||||
translator.EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -116,6 +116,7 @@ public:
|
|||
|
||||
// SOPP
|
||||
void S_BARRIER();
|
||||
void S_SENDMSG(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
// SMRD
|
||||
|
|
|
@ -160,9 +160,19 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
|
||||
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const bool is_ring = mtbuf.glc && mtbuf.slc;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
if (info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (is_ring) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
}
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
|
@ -171,13 +181,12 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
IR::BufferInstInfo buffer_info{};
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
@ -190,7 +199,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{value});
|
||||
|
@ -230,9 +239,20 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
|||
|
||||
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const bool is_ring = mtbuf.glc && mtbuf.slc;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
|
||||
IR::Value address = [&] -> IR::Value {
|
||||
if (is_ring) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
}
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
|
@ -241,13 +261,12 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
IR::BufferInstInfo buffer_info{};
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
@ -278,7 +297,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, buffer_info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue