video_core: Bringup some basic functionality (#145)

* video_core: Remove hack in rasterizer

* The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers

* shader_recompiler: Implement attribute loads/stores

* video_core: Add basic vertex, index buffer handling and pipeline caching

* externals: Make xxhash lowercase
This commit is contained in:
TheTurtle 2024-05-25 15:33:15 +03:00 committed by GitHub
parent e9f64bb76c
commit 3c90b8ac00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1030 additions and 383 deletions

View file

@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.stage) {
switch (program.info.stage) {
case Stage::Compute: {
// const std::array<u32, 3> workgroup_size{program.workgroup_size};
// execution_model = spv::ExecutionModel::GLCompute;
@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
// }
break;
default:
throw NotImplementedException("Stage {}", u32(program.stage));
throw NotImplementedException("Stage {}", u32(program.info.stage));
}
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
@ -222,7 +222,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, Binding
EmitContext ctx{profile, program, bindings};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
if (program.stage == Stage::Vertex) {
if (program.info.stage == Stage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}

View file

@ -10,12 +10,11 @@ namespace {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index).at(element)};
const auto& info{ctx.output_params.at(index)};
if (info.num_components == 1) {
return info.id;
} else {
const u32 index_element{element - info.first_element};
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element));
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
}
}
switch (attr) {
@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::
throw LogicError("Unreachable instruction");
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const u32 element{static_cast<u32>(attr) % 4};
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(element == 3 ? 1.0f : 0.0f);
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))};
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
}
throw NotImplementedException("Read attribute {}", attr);
}
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
switch (attr) {
case IR::Attribute::VertexId:
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Param0) {
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, value);
}

View file

@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu
const IR::Value& offset);
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);

View file

@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} {
u32& uniform_binding{bindings.unified};
u32& storage_binding{bindings.unified};
u32& texture_binding{bindings.unified};
@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() {
u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f);
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32");
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
}
@ -107,26 +111,123 @@ void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineOutputs(program);
}
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
case AmdGpu::NumberFormat::Uint:
return ctx.U32[4];
case AmdGpu::NumberFormat::Sscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Uscaled:
return ctx.F32[4];
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
case AmdGpu::NumberFormat::Sint:
return {id, input_s32, S32[1], 4};
case AmdGpu::NumberFormat::Sscaled:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uscaled:
return {id, input_f32, F32[1], 4};
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
case 1:
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
case 2:
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
case 3:
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
default:
UNREACHABLE();
}
}
void EmitContext::DefineInputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) {
case Stage::Vertex:
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
break;
case Stage::Fragment:
for (const auto& input : info.ps_inputs) {
if (input.is_default) {
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
input_f32, F32[1]};
continue;
}
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
const u32 num_components = info.loads.NumComponents(param);
const Id type{F32[num_components]};
const Id id{DefineInput(type, input.semantic)};
if (input.is_flat) {
Decorate(id, spv::Decoration::Flat);
}
Name(id, fmt::format("fs_in_attr{}", input.semantic));
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
interfaces.push_back(id);
}
default:
break;
}
}
void EmitContext::DefineOutputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) {
case Stage::Vertex:
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], num_components};
interfaces.push_back(id);
}
break;
case Stage::Fragment:
frag_color[0] = DefineOutput(F32[4], 0);
Name(frag_color[0], fmt::format("frag_color{}", 0));
interfaces.push_back(frag_color[0]);
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
continue;
}
frag_color[i] = DefineOutput(F32[4], i);
Name(frag_color[i], fmt::format("frag_color{}", i));
interfaces.push_back(frag_color[i]);
}
break;
default:
break;

View file

@ -135,6 +135,9 @@ public:
Id u32_zero_value{};
Id f32_zero_value{};
Id input_u32{};
Id input_f32{};
Id input_s32{};
Id output_u32{};
Id output_f32{};
@ -145,25 +148,22 @@ public:
Id base_vertex{};
std::array<Id, 8> frag_color{};
struct InputParamInfo {
struct SpirvAttribute {
Id id;
Id pointer_type;
Id component_type;
u32 num_components;
};
std::array<InputParamInfo, 32> input_params{};
struct ParamElementInfo {
Id id{};
u32 first_element{};
u32 num_components{};
};
std::array<std::array<ParamElementInfo, 4>, 32> output_params{};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};
private:
void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program);
void DefineInputs(const IR::Program& program);
void DefineOutputs(const IR::Program& program);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
};
} // namespace Shader::Backend::SPIRV

View file

@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/static_vector.hpp>
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h"
namespace Shader::Gcn {
/**
* s_load_dwordx4 s[8:11], s[2:3], 0x00
* s_load_dwordx4 s[12:15], s[2:3], 0x04
* s_load_dwordx4 s[16:19], s[2:3], 0x08
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen
* buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen
* buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen
* s_waitcnt 0
* s_setpc_b64 s[0:1]
* s_load_dwordx4 s[4:7], s[2:3], 0x0
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen
* s_load_dwordx4 s[4:7], s[2:3], 0x8
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen
* s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
* s_setpc_b64 s[0:1]
* A normal fetch shader looks like the above, the instructions are generated
* using input semantics on cpu side. Load instructions can either be separate or interleaved
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
struct VsharpLoad {
u32 dword_offset{};
s32 base_sgpr{};
s32 dst_reg{-1};
};
boost::container::static_vector<VsharpLoad, 16> loads;
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
if (inst.opcode == Opcode::S_SETPC_B64) {
break;
}
if (inst.inst_class == InstClass::ScalarMemRd) {
loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code);
continue;
}
if (inst.inst_class == InstClass::VectorMemBufFmt) {
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
const u32 base_sgpr = inst.src[2].code * 4;
// Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table.
const auto it = std::ranges::find_if(
loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
auto& attrib = attributes.emplace_back();
attrib.semantic = semantic_index++;
attrib.dest_vgpr = inst.src[1].code;
attrib.num_elements = inst.control.mubuf.count;
attrib.sgpr_base = it->base_sgpr;
attrib.dword_offset = it->dword_offset;
// Mark load as used.
it->dst_reg = -1;
}
}
return attributes;
}
} // namespace Shader::Gcn

View file

@ -0,0 +1,21 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/types.h"
namespace Shader::Gcn {
struct VertexAttribute {
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
};
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
} // namespace Shader::Gcn

View file

@ -600,9 +600,9 @@ public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Stage stage_)
Info& info_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} {
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block};
@ -633,8 +633,7 @@ private:
ensure_block();
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stage, inst_list.subspan(start, size));
fmt::print("{}\n", IR::DumpBlock(*current_block));
Translate(current_block, inst_list.subspan(start, size), info);
break;
}
case StatementType::SetVariable: {
@ -812,17 +811,17 @@ private:
IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{};
std::span<const GcnInst> inst_list;
Stage stage;
Info& info;
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Stage stage) {
CFG& cfg, Info& info) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage};
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
return syntax_list;
}

View file

@ -10,13 +10,13 @@
#include "shader_recompiler/object_pool.h"
namespace Shader {
enum class Stage : u32;
struct Info;
}
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
Stage stage);
Info& info);
} // namespace Shader::Gcn

View file

@ -2,14 +2,16 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Gcn {
Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} {
Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} {
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (stage) {
switch (info.stage) {
case Stage::Vertex:
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
@ -92,11 +94,39 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
}
}
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list) {
void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code;
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
for (u32 i = 0; i < attrib.num_elements; i++) {
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
}
// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
});
}
}
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) {
return;
}
Translator translator{block, stage};
Translator translator{block, info};
for (const auto& inst : inst_list) {
switch (inst.opcode) {
case Opcode::S_MOV_B32:
@ -115,6 +145,9 @@ void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list
translator.V_MUL_F32(inst);
break;
case Opcode::S_SWAPPC_B64:
ASSERT(info.stage == Stage::Vertex);
translator.EmitFetch(inst);
break;
case Opcode::S_WAITCNT:
break; // Ignore for now.
case Opcode::S_BUFFER_LOAD_DWORDX16:

View file

@ -7,9 +7,10 @@
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader {
enum class Stage : u32;
struct Info;
}
namespace Shader::Gcn {
@ -25,7 +26,9 @@ enum class ConditionOp : u32 {
class Translator {
public:
explicit Translator(IR::Block* block_, Stage stage);
explicit Translator(IR::Block* block_, Info& info);
void EmitFetch(const GcnInst& inst);
// Scalar ALU
void S_MOV(const GcnInst& inst);
@ -66,8 +69,9 @@ private:
private:
IR::Block* block;
IR::IREmitter ir;
Info& info;
};
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list);
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn

View file

@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) {
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])),
ir.FPConvert(16, GetSrc(inst.src[1])));
ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32));
const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
}
void Translator::V_MUL_F32(const GcnInst& inst) {

View file

@ -7,7 +7,9 @@ namespace Shader::Gcn {
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr};
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
attr.semantic = inst.control.vintrp.attr;
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
}

View file

@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) {
return "Param31";
case Attribute::VertexId:
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::FragCoord:
return "FragCoord";
default:
break;
}

View file

@ -72,10 +72,12 @@ enum class Attribute : u64 {
LocalInvocationId = 75,
LocalInvocationIndex = 76,
FragCoord = 77,
Max,
};
constexpr size_t EXP_NUM_POS = 4;
constexpr size_t EXP_NUM_PARAM = 32;
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32;
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32;
if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic");
}
if (result < static_cast<int>(Attribute::Param0)) {
if (result < static_cast<int>(Attribute::RenderTarget0)) {
throw LogicError("Underflow on register arithmetic");
}
return static_cast<Attribute>(result);

View file

@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute) {
return GetAttribute(attribute, 0);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
return GetAttributeU32(attribute, 0);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
return Inst<U32>(Opcode::GetAttributeU32, attribute, Imm32(comp));
}

View file

@ -58,11 +58,9 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp);
void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);

View file

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: {
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break;
}
case IR::Opcode::SetAttribute: {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
default:
break;
}
}
void CollectShaderInfoPass(IR::Program& program) {
Info& info{program.info};
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(info, inst);
}
}
}
} // namespace Shader::Optimization

View file

@ -4,6 +4,7 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
@ -11,6 +12,7 @@ void SsaRewritePass(IR::BlockList& program);
void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::BlockList& program);
void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::BlockList& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
} // namespace Shader::Optimization

View file

@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) {
};
}
void ResourceTrackingPass(IR::BlockList& program) {
for (IR::Block* const block : program) {
void ResourceTrackingPass(IR::Program& program) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (!IsResourceInstruction(inst)) {
continue;
}
printf("ff\n");
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto loc = TrackSharp(producer->Arg(0));
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,

View file

@ -3,15 +3,11 @@
#pragma once
#include <array>
#include <string>
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader {
enum class Stage : u32;
}
#include "shader_recompiler/runtime_info.h"
namespace Shader::IR {
@ -20,7 +16,7 @@ struct Program {
BlockList blocks;
BlockList post_order_blocks;
std::vector<Gcn::GcnInst> ins_list;
Stage stage;
Info info;
};
[[nodiscard]] std::string DumpProgram(const Program& program);

View file

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
@ -30,9 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks;
}
std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32> token) {
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> token, const Info&& info) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -40,6 +38,11 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
Gcn::GcnDecodeContext decoder;
static int counter = 0;
std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary);
file.write((const char*)token.data(), token.size_bytes());
file.close();
// Decode and save instructions
IR::Program program;
program.ins_list.reserve(token.size());
@ -52,21 +55,24 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage);
program.info = std::move(info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
program.stage = stage;
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);
// Shader::Optimization::ResourceTrackingPass(program.post_order_blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
Shader::Optimization::CollectShaderInfoPass(program);
// TODO: Pass profile from vulkan backend
const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program);
return code;
for (const auto& block : program.blocks) {
fmt::print("{}\n", IR::DumpBlock(*block));
}
return program;
}
} // namespace Shader

View file

@ -3,7 +3,9 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
@ -26,8 +28,8 @@ struct BinaryInfo {
u32 crc32;
};
[[nodiscard]] std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32> code);
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, const Info&& info);
} // namespace Shader

View file

@ -3,39 +3,16 @@
#pragma once
#include <array>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/type.h"
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/ir/attribute.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader {
enum class AttributeType : u8 {
Float,
SignedInt,
UnsignedInt,
SignedScaled,
UnsignedScaled,
Disabled,
};
enum class InputTopology {
Points,
Lines,
LinesAdjacency,
Triangles,
TrianglesAdjacency,
};
enum class CompareFunction {
Never,
Less,
Equal,
LessThanEqual,
Greater,
NotEqual,
GreaterThanEqual,
Always,
};
static constexpr size_t NumUserDataRegs = 16;
enum class Stage : u32 {
Vertex,
@ -62,78 +39,64 @@ enum class TextureType : u32 {
};
constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class Interpolation {
Smooth,
Flat,
NoPerspective,
};
struct ConstantBufferDescriptor {
u32 index;
u32 count;
auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct TextureDescriptor {
TextureType type;
bool is_eud;
bool is_depth;
bool is_multisample;
bool is_storage;
u32 count;
u32 eud_offset_dwords;
u32 ud_index_dwords;
auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
struct Info {
bool uses_workgroup_id{};
bool uses_local_invocation_id{};
bool uses_invocation_id{};
bool uses_invocation_info{};
bool uses_sample_id{};
struct VsInput {
AmdGpu::NumberFormat fmt;
u16 binding;
u16 num_components;
u8 sgpr_base;
u8 dword_offset;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
std::array<Interpolation, 32> interpolation{};
// VaryingState loads;
// VaryingState stores;
// VaryingState passthrough;
struct PsInput {
u32 param_index;
u32 semantic;
bool is_default;
bool is_flat;
u32 default_value;
};
boost::container::static_vector<PsInput, 32> ps_inputs{};
std::array<bool, 8> stores_frag_color{};
bool stores_sample_mask{};
bool stores_frag_depth{};
struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[Index(attrib)] & (1 << comp);
}
bool uses_fp16{};
bool uses_fp64{};
bool uses_fp16_denorms_flush{};
bool uses_fp16_denorms_preserve{};
bool uses_fp32_denorms_flush{};
bool uses_fp32_denorms_preserve{};
bool uses_int8{};
bool uses_int16{};
bool uses_int64{};
bool uses_image_1d{};
bool uses_sampled_1d{};
bool uses_subgroup_vote{};
bool uses_subgroup_mask{};
bool uses_derivatives{};
bool GetAny(IR::Attribute attrib) const {
return flags[Index(attrib)];
}
IR::Type used_constant_buffer_types{};
IR::Type used_storage_buffer_types{};
IR::Type used_indirect_cbuf_types{};
void Set(IR::Attribute attrib, u32 comp = 0) {
flags[Index(attrib)] |= (1 << comp);
}
// std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
u32 used_clip_distances{};
u32 NumComponents(IR::Attribute attrib) const {
const u8 mask = flags[Index(attrib)];
ASSERT(mask != 0b1011 || mask != 0b1101);
return std::popcount(mask);
}
// boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
// constant_buffer_descriptors;
// boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS>
// storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors;
// ImageBufferDescriptors image_buffer_descriptors;
// TextureDescriptors texture_descriptors;
// ImageDescriptors image_descriptors;
static size_t Index(IR::Attribute attrib) {
return static_cast<size_t>(attrib);
}
std::array<u8, IR::NumAttributes> flags;
};
AttributeFlags loads{};
AttributeFlags stores{};
std::span<const u32> user_data;
Stage stage;
template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
T data;
u32* base;
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
std::memcpy(&data, base + dword_offset, sizeof(T));
return data;
}
};
} // namespace Shader