mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-19 09:54:54 +00:00
spirv: Flush denormals if possible (#1302)
This commit is contained in:
parent
56e8ed7833
commit
100036aecf
14 changed files with 130 additions and 98 deletions
|
@ -206,10 +206,7 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
|
||||||
return main;
|
return main;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
void SetupCapabilities(const Info& info, EmitContext& ctx) {
|
||||||
const auto& info = program.info;
|
|
||||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
|
||||||
spv::ExecutionModel execution_model{};
|
|
||||||
ctx.AddCapability(spv::Capability::Image1D);
|
ctx.AddCapability(spv::Capability::Image1D);
|
||||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||||
|
@ -247,6 +244,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||||
if (info.uses_group_ballot) {
|
if (info.uses_group_ballot) {
|
||||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||||
}
|
}
|
||||||
|
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
|
||||||
|
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||||
|
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||||
|
}
|
||||||
|
if (info.stage == Stage::Geometry) {
|
||||||
|
ctx.AddCapability(spv::Capability::Geometry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||||
|
const auto& info = program.info;
|
||||||
|
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||||
|
spv::ExecutionModel execution_model{};
|
||||||
switch (program.info.stage) {
|
switch (program.info.stage) {
|
||||||
case Stage::Compute: {
|
case Stage::Compute: {
|
||||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||||
|
@ -290,6 +300,24 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
|
Id main_func) {
|
||||||
|
ctx.AddExtension("SPV_KHR_float_controls");
|
||||||
|
const auto fp_denorm_mode = runtime_info.fp_denorm_mode32;
|
||||||
|
if (fp_denorm_mode == AmdGpu::FpDenormMode::InOutFlush) {
|
||||||
|
if (profile.support_fp32_denorm_flush) {
|
||||||
|
ctx.AddCapability(spv::Capability::DenormFlushToZero);
|
||||||
|
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Unknown FP denorm mode {}", u32(fp_denorm_mode));
|
||||||
|
}
|
||||||
|
const auto fp_round_mode = runtime_info.fp_round_mode32;
|
||||||
|
if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Unknown FP rounding mode {}", u32(fp_round_mode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
|
void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
|
||||||
auto inst{program.blocks.front()->begin()};
|
auto inst{program.blocks.front()->begin()};
|
||||||
size_t block_index{0};
|
size_t block_index{0};
|
||||||
|
@ -314,18 +342,8 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
||||||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||||
const Id main{DefineMain(ctx, program)};
|
const Id main{DefineMain(ctx, program)};
|
||||||
DefineEntryPoint(program, ctx, main);
|
DefineEntryPoint(program, ctx, main);
|
||||||
switch (program.info.stage) {
|
SetupCapabilities(program.info, ctx);
|
||||||
case Stage::Export:
|
SetupFloatMode(ctx, profile, runtime_info, main);
|
||||||
case Stage::Vertex:
|
|
||||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
|
||||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
|
||||||
break;
|
|
||||||
case Stage::Geometry:
|
|
||||||
ctx.AddCapability(spv::Capability::Geometry);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
PatchPhiNodes(program, ctx);
|
PatchPhiNodes(program, ctx);
|
||||||
binding.user_data += program.info.ud_mask.NumRegs();
|
binding.user_data += program.info.ud_mask.NumRegs();
|
||||||
return ctx.Assemble();
|
return ctx.Assemble();
|
||||||
|
|
|
@ -284,7 +284,8 @@ void EmitContext::DefineInputs() {
|
||||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||||
for (const auto& input : runtime_info.fs_info.inputs) {
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||||
|
const auto& input = runtime_info.fs_info.inputs[i];
|
||||||
const u32 semantic = input.param_index;
|
const u32 semantic = input.param_index;
|
||||||
ASSERT(semantic < IR::NumParams);
|
ASSERT(semantic < IR::NumParams);
|
||||||
if (input.is_default && !input.is_flat) {
|
if (input.is_default && !input.is_flat) {
|
||||||
|
@ -333,7 +334,6 @@ void EmitContext::DefineInputs() {
|
||||||
|
|
||||||
const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u;
|
const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u;
|
||||||
for (int param_id = 0; param_id < num_params; ++param_id) {
|
for (int param_id = 0; param_id < num_params; ++param_id) {
|
||||||
const IR::Attribute param{IR::Attribute::Param0 + param_id};
|
|
||||||
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
||||||
const Id id{DefineInput(type, param_id)};
|
const Id id{DefineInput(type, param_id)};
|
||||||
Name(id, fmt::format("in_attr{}", param_id));
|
Name(id, fmt::format("in_attr{}", param_id));
|
||||||
|
@ -394,8 +394,7 @@ void EmitContext::DefineOutputs() {
|
||||||
case Stage::Geometry: {
|
case Stage::Geometry: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
|
|
||||||
for (u32 attr_id = 0; attr_id < runtime_info.gs_info.copy_data.num_attrs; attr_id++) {
|
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||||
const IR::Attribute param{IR::Attribute::Param0 + attr_id};
|
|
||||||
const Id id{DefineOutput(F32[4], attr_id)};
|
const Id id{DefineOutput(F32[4], attr_id)};
|
||||||
Name(id, fmt::format("out_attr{}", attr_id));
|
Name(id, fmt::format("out_attr{}", attr_id));
|
||||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
CopyShaderData ParseCopyShader(const std::span<const u32>& code) {
|
CopyShaderData ParseCopyShader(std::span<const u32> code) {
|
||||||
Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()};
|
Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()};
|
||||||
Gcn::GcnDecodeContext decoder;
|
Gcn::GcnDecodeContext decoder;
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,6 @@ struct CopyShaderData {
|
||||||
u32 num_attrs{0};
|
u32 num_attrs{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
CopyShaderData ParseCopyShader(const std::span<const u32>& code);
|
CopyShaderData ParseCopyShader(std::span<const u32> code);
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
|
@ -3,12 +3,12 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <vector>
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
#include "shader_recompiler/backend/bindings.h"
|
||||||
|
#include "shader_recompiler/frontend/copy_shader.h"
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/ir/type.h"
|
#include "shader_recompiler/ir/type.h"
|
||||||
|
@ -170,6 +170,8 @@ struct Info {
|
||||||
};
|
};
|
||||||
UserDataMask ud_mask{};
|
UserDataMask ud_mask{};
|
||||||
|
|
||||||
|
CopyShaderData gs_copy_data;
|
||||||
|
|
||||||
s8 vertex_offset_sgpr = -1;
|
s8 vertex_offset_sgpr = -1;
|
||||||
s8 instance_offset_sgpr = -1;
|
s8 instance_offset_sgpr = -1;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
|
@ -11,6 +11,8 @@ namespace Shader::Optimization {
|
||||||
|
|
||||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
Stage stage) {
|
Stage stage) {
|
||||||
|
auto& info = program.info;
|
||||||
|
|
||||||
const auto& ForEachInstruction = [&](auto func) {
|
const auto& ForEachInstruction = [&](auto func) {
|
||||||
for (IR::Block* block : program.blocks) {
|
for (IR::Block* block : program.blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
@ -52,6 +54,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Geometry: {
|
case Stage::Geometry: {
|
||||||
|
const auto& gs_info = runtime_info.gs_info;
|
||||||
|
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
|
||||||
|
|
||||||
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const auto opcode = inst.GetOpcode();
|
const auto opcode = inst.GetOpcode();
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
|
@ -81,12 +86,12 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||||
|
|
||||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||||
const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u;
|
const auto comp_ofs = gs_info.output_vertices * 4u;
|
||||||
const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size;
|
const auto output_size = comp_ofs * gs_info.out_vertex_data_size;
|
||||||
|
|
||||||
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
||||||
const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs);
|
const auto& it = info.gs_copy_data.attr_map.find(vc_read_ofs);
|
||||||
ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend());
|
ASSERT(it != info.gs_copy_data.attr_map.cend());
|
||||||
const auto& [attr, comp] = it->second;
|
const auto& [attr, comp] = it->second;
|
||||||
|
|
||||||
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
||||||
|
|
|
@ -10,20 +10,6 @@
|
||||||
|
|
||||||
namespace Shader::IR {
|
namespace Shader::IR {
|
||||||
|
|
||||||
enum class FpRoundMode : u32 {
|
|
||||||
NearestEven = 0,
|
|
||||||
PlusInf = 1,
|
|
||||||
MinInf = 2,
|
|
||||||
ToZero = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class FpDenormMode : u32 {
|
|
||||||
InOutFlush = 0,
|
|
||||||
InAllowOutFlush = 1,
|
|
||||||
InFlushOutAllow = 2,
|
|
||||||
InOutAllow = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class FloatClassFunc : u32 {
|
enum class FloatClassFunc : u32 {
|
||||||
SignalingNan = 1 << 0,
|
SignalingNan = 1 << 0,
|
||||||
QuietNan = 1 << 1,
|
QuietNan = 1 << 1,
|
||||||
|
@ -41,13 +27,6 @@ enum class FloatClassFunc : u32 {
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
|
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
|
||||||
|
|
||||||
union Mode {
|
|
||||||
BitField<0, 4, FpRoundMode> fp_round;
|
|
||||||
BitField<4, 2, FpDenormMode> fp_denorm_single;
|
|
||||||
BitField<6, 2, FpDenormMode> fp_denorm_double;
|
|
||||||
BitField<8, 1, u32> dx10_clamp;
|
|
||||||
};
|
|
||||||
|
|
||||||
union TextureInstInfo {
|
union TextureInstInfo {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 1, u32> is_depth;
|
BitField<0, 1, u32> is_depth;
|
||||||
|
|
|
@ -19,13 +19,8 @@ struct Profile {
|
||||||
bool support_float_controls{};
|
bool support_float_controls{};
|
||||||
bool support_separate_denorm_behavior{};
|
bool support_separate_denorm_behavior{};
|
||||||
bool support_separate_rounding_mode{};
|
bool support_separate_rounding_mode{};
|
||||||
bool support_fp16_denorm_preserve{};
|
|
||||||
bool support_fp32_denorm_preserve{};
|
bool support_fp32_denorm_preserve{};
|
||||||
bool support_fp16_denorm_flush{};
|
|
||||||
bool support_fp32_denorm_flush{};
|
bool support_fp32_denorm_flush{};
|
||||||
bool support_fp16_signed_zero_nan_preserve{};
|
|
||||||
bool support_fp32_signed_zero_nan_preserve{};
|
|
||||||
bool support_fp64_signed_zero_nan_preserve{};
|
|
||||||
bool support_explicit_workgroup_layout{};
|
bool support_explicit_workgroup_layout{};
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
bool lower_left_origin_mode{};
|
bool lower_left_origin_mode{};
|
||||||
|
|
|
@ -4,11 +4,9 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <span>
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
#include "common/assert.h"
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "frontend/copy_shader.h"
|
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
@ -62,7 +60,8 @@ enum class VsOutput : u8 {
|
||||||
using VsOutputMap = std::array<VsOutput, 4>;
|
using VsOutputMap = std::array<VsOutput, 4>;
|
||||||
|
|
||||||
struct VertexRuntimeInfo {
|
struct VertexRuntimeInfo {
|
||||||
boost::container::static_vector<VsOutputMap, 3> outputs;
|
u32 num_outputs;
|
||||||
|
std::array<VsOutputMap, 3> outputs;
|
||||||
bool emulate_depth_negative_one_to_one{};
|
bool emulate_depth_negative_one_to_one{};
|
||||||
|
|
||||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||||
|
@ -79,13 +78,13 @@ struct GeometryRuntimeInfo {
|
||||||
u32 out_vertex_data_size{};
|
u32 out_vertex_data_size{};
|
||||||
AmdGpu::PrimitiveType in_primitive;
|
AmdGpu::PrimitiveType in_primitive;
|
||||||
GsOutputPrimTypes out_primitive;
|
GsOutputPrimTypes out_primitive;
|
||||||
CopyShaderData copy_data;
|
std::span<const u32> vs_copy;
|
||||||
|
u64 vs_copy_hash;
|
||||||
|
|
||||||
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
|
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
|
||||||
return num_invocations && other.num_invocations &&
|
return num_invocations && other.num_invocations &&
|
||||||
output_vertices == other.output_vertices && in_primitive == other.in_primitive &&
|
output_vertices == other.output_vertices && in_primitive == other.in_primitive &&
|
||||||
std::ranges::equal(out_primitive, other.out_primitive) &&
|
std::ranges::equal(out_primitive, other.out_primitive);
|
||||||
std::ranges::equal(copy_data.attr_map, other.copy_data.attr_map);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -106,7 +105,8 @@ struct FragmentRuntimeInfo {
|
||||||
|
|
||||||
auto operator<=>(const PsInput&) const noexcept = default;
|
auto operator<=>(const PsInput&) const noexcept = default;
|
||||||
};
|
};
|
||||||
boost::container::static_vector<PsInput, 32> inputs;
|
u32 num_inputs;
|
||||||
|
std::array<PsInput, 32> inputs;
|
||||||
struct PsColorBuffer {
|
struct PsColorBuffer {
|
||||||
AmdGpu::NumberFormat num_format;
|
AmdGpu::NumberFormat num_format;
|
||||||
MrtSwizzle mrt_swizzle;
|
MrtSwizzle mrt_swizzle;
|
||||||
|
@ -117,7 +117,9 @@ struct FragmentRuntimeInfo {
|
||||||
|
|
||||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||||
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
||||||
std::ranges::equal(inputs, other.inputs);
|
num_inputs == other.num_inputs &&
|
||||||
|
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
|
||||||
|
other.inputs.begin() + num_inputs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -141,11 +143,15 @@ struct RuntimeInfo {
|
||||||
u32 num_user_data;
|
u32 num_user_data;
|
||||||
u32 num_input_vgprs;
|
u32 num_input_vgprs;
|
||||||
u32 num_allocated_vgprs;
|
u32 num_allocated_vgprs;
|
||||||
ExportRuntimeInfo es_info;
|
AmdGpu::FpDenormMode fp_denorm_mode32;
|
||||||
VertexRuntimeInfo vs_info;
|
AmdGpu::FpRoundMode fp_round_mode32;
|
||||||
GeometryRuntimeInfo gs_info;
|
union {
|
||||||
FragmentRuntimeInfo fs_info;
|
ExportRuntimeInfo es_info;
|
||||||
ComputeRuntimeInfo cs_info;
|
VertexRuntimeInfo vs_info;
|
||||||
|
GeometryRuntimeInfo gs_info;
|
||||||
|
FragmentRuntimeInfo fs_info;
|
||||||
|
ComputeRuntimeInfo cs_info;
|
||||||
|
};
|
||||||
|
|
||||||
RuntimeInfo(Stage stage_) : stage{stage_} {}
|
RuntimeInfo(Stage stage_) : stage{stage_} {}
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,12 @@ struct Liverpool {
|
||||||
union {
|
union {
|
||||||
BitField<0, 6, u64> num_vgprs;
|
BitField<0, 6, u64> num_vgprs;
|
||||||
BitField<6, 4, u64> num_sgprs;
|
BitField<6, 4, u64> num_sgprs;
|
||||||
|
BitField<10, 2, u64> priority;
|
||||||
|
BitField<12, 2, FpRoundMode> fp_round_mode32;
|
||||||
|
BitField<14, 2, FpRoundMode> fp_round_mode64;
|
||||||
|
BitField<16, 2, FpDenormMode> fp_denorm_mode32;
|
||||||
|
BitField<18, 2, FpDenormMode> fp_denorm_mode64;
|
||||||
|
BitField<12, 8, u64> float_mode;
|
||||||
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
|
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
|
||||||
BitField<33, 5, u64> num_user_regs;
|
BitField<33, 5, u64> num_user_regs;
|
||||||
} settings;
|
} settings;
|
||||||
|
|
|
@ -7,6 +7,20 @@
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
|
enum class FpRoundMode : u32 {
|
||||||
|
NearestEven = 0,
|
||||||
|
PlusInf = 1,
|
||||||
|
MinInf = 2,
|
||||||
|
ToZero = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class FpDenormMode : u32 {
|
||||||
|
InOutFlush = 0,
|
||||||
|
InAllowOutFlush = 1,
|
||||||
|
InFlushOutAllow = 2,
|
||||||
|
InOutAllow = 3,
|
||||||
|
};
|
||||||
|
|
||||||
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
||||||
enum class PrimitiveType : u32 {
|
enum class PrimitiveType : u32 {
|
||||||
None = 0,
|
None = 0,
|
||||||
|
|
|
@ -217,9 +217,10 @@ bool Instance::CreateDevice() {
|
||||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||||
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties,
|
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties,
|
||||||
vk::PhysicalDevicePushDescriptorPropertiesKHR>();
|
vk::PhysicalDevicePushDescriptorPropertiesKHR, vk::PhysicalDeviceVulkan12Properties>();
|
||||||
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
|
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
|
||||||
push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>();
|
push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>();
|
||||||
|
vk12_props = properties_chain.get<vk::PhysicalDeviceVulkan12Properties>();
|
||||||
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
|
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
|
||||||
|
|
||||||
features = feature_chain.get().features;
|
features = feature_chain.get().features;
|
||||||
|
|
|
@ -242,6 +242,11 @@ public:
|
||||||
return push_descriptor_props.maxPushDescriptors;
|
return push_descriptor_props.maxPushDescriptors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the vulkan 1.2 physical device properties.
|
||||||
|
const vk::PhysicalDeviceVulkan12Properties& GetVk12Properties() const noexcept {
|
||||||
|
return vk12_props;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if shaders can declare the ClipDistance attribute
|
/// Returns true if shaders can declare the ClipDistance attribute
|
||||||
bool IsShaderClipDistanceSupported() const {
|
bool IsShaderClipDistanceSupported() const {
|
||||||
return features.shaderClipDistance;
|
return features.shaderClipDistance;
|
||||||
|
@ -279,6 +284,7 @@ private:
|
||||||
vk::UniqueDevice device;
|
vk::UniqueDevice device;
|
||||||
vk::PhysicalDeviceProperties properties;
|
vk::PhysicalDeviceProperties properties;
|
||||||
vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
|
vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
|
||||||
|
vk::PhysicalDeviceVulkan12Properties vk12_props;
|
||||||
vk::PhysicalDeviceFeatures features;
|
vk::PhysicalDeviceFeatures features;
|
||||||
vk::DriverIdKHR driver_id;
|
vk::DriverIdKHR driver_id;
|
||||||
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
|
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
#include "shader_recompiler/frontend/copy_shader.h"
|
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/recompiler.h"
|
#include "shader_recompiler/recompiler.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
@ -41,7 +40,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||||
w != VsOutput::None) {
|
w != VsOutput::None) {
|
||||||
info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
info.outputs[info.num_outputs++] = Shader::VsOutputMap{x, y, z, w};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// VS_OUT_MISC_VEC
|
// VS_OUT_MISC_VEC
|
||||||
|
@ -84,18 +83,21 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||||
auto info = Shader::RuntimeInfo{stage};
|
auto info = Shader::RuntimeInfo{stage};
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
|
const auto BuildCommon = [&](const auto& program) {
|
||||||
|
info.num_user_data = program.settings.num_user_regs;
|
||||||
|
info.num_input_vgprs = program.settings.vgpr_comp_cnt;
|
||||||
|
info.num_allocated_vgprs = program.settings.num_vgprs * 4;
|
||||||
|
info.fp_denorm_mode32 = program.settings.fp_denorm_mode32;
|
||||||
|
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
||||||
|
};
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Shader::Stage::Export: {
|
case Shader::Stage::Export: {
|
||||||
info.num_user_data = regs.es_program.settings.num_user_regs;
|
BuildCommon(regs.es_program);
|
||||||
info.num_input_vgprs = regs.es_program.settings.vgpr_comp_cnt;
|
|
||||||
info.num_allocated_vgprs = regs.es_program.settings.num_vgprs * 4;
|
|
||||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Vertex: {
|
case Shader::Stage::Vertex: {
|
||||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
BuildCommon(regs.vs_program);
|
||||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
|
||||||
info.num_allocated_vgprs = regs.vs_program.settings.num_vgprs * 4;
|
|
||||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||||
info.vs_info.emulate_depth_negative_one_to_one =
|
info.vs_info.emulate_depth_negative_one_to_one =
|
||||||
!instance.IsDepthClipControlSupported() &&
|
!instance.IsDepthClipControlSupported() &&
|
||||||
|
@ -103,39 +105,35 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Geometry: {
|
case Shader::Stage::Geometry: {
|
||||||
info.num_user_data = regs.gs_program.settings.num_user_regs;
|
BuildCommon(regs.gs_program);
|
||||||
info.num_input_vgprs = regs.gs_program.settings.vgpr_comp_cnt;
|
auto& gs_info = info.gs_info;
|
||||||
info.num_allocated_vgprs = regs.gs_program.settings.num_vgprs * 4;
|
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||||
info.gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
gs_info.num_invocations =
|
||||||
info.gs_info.num_invocations =
|
|
||||||
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
|
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
|
||||||
info.gs_info.in_primitive = regs.primitive_type;
|
gs_info.in_primitive = regs.primitive_type;
|
||||||
for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
|
for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
|
||||||
info.gs_info.out_primitive[stream_id] =
|
gs_info.out_primitive[stream_id] =
|
||||||
regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
|
regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
|
||||||
}
|
}
|
||||||
info.gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||||
info.gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
|
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
|
||||||
|
const auto params_vc = Liverpool::GetParams(regs.vs_program);
|
||||||
// Extract semantics offsets from a copy shader
|
gs_info.vs_copy = params_vc.code;
|
||||||
const auto vc_stage = Shader::Stage::Vertex;
|
gs_info.vs_copy_hash = params_vc.hash;
|
||||||
const auto* pgm_vc = regs.ProgramForStage(static_cast<u32>(vc_stage));
|
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
|
||||||
const auto params_vc = Liverpool::GetParams(*pgm_vc);
|
|
||||||
DumpShader(params_vc.code, params_vc.hash, Shader::Stage::Vertex, 0, "copy.bin");
|
|
||||||
info.gs_info.copy_data = Shader::ParseCopyShader(params_vc.code);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Fragment: {
|
case Shader::Stage::Fragment: {
|
||||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
BuildCommon(regs.ps_program);
|
||||||
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
|
|
||||||
const auto& ps_inputs = regs.ps_inputs;
|
const auto& ps_inputs = regs.ps_inputs;
|
||||||
|
info.fs_info.num_inputs = regs.num_interp;
|
||||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||||
info.fs_info.inputs.push_back({
|
info.fs_info.inputs[i] = {
|
||||||
.param_index = u8(ps_inputs[i].input_offset.Value()),
|
.param_index = u8(ps_inputs[i].input_offset.Value()),
|
||||||
.is_default = bool(ps_inputs[i].use_default),
|
.is_default = bool(ps_inputs[i].use_default),
|
||||||
.is_flat = bool(ps_inputs[i].flat_shade),
|
.is_flat = bool(ps_inputs[i].flat_shade),
|
||||||
.default_value = u8(ps_inputs[i].default_value),
|
.default_value = u8(ps_inputs[i].default_value),
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||||
info.fs_info.color_buffers[i] = {
|
info.fs_info.color_buffers[i] = {
|
||||||
|
@ -166,9 +164,12 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||||
desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} {
|
desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} {
|
||||||
|
const auto& vk12_props = instance.GetVk12Properties();
|
||||||
profile = Shader::Profile{
|
profile = Shader::Profile{
|
||||||
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
|
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
|
||||||
.subgroup_size = instance.SubgroupSize(),
|
.subgroup_size = instance.SubgroupSize(),
|
||||||
|
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||||
|
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||||
.support_explicit_workgroup_layout = true,
|
.support_explicit_workgroup_layout = true,
|
||||||
};
|
};
|
||||||
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue