spirv: Flush denormals if possible (#1302)

This commit is contained in:
TheTurtle 2024-10-10 17:47:39 +03:00 committed by GitHub
parent 56e8ed7833
commit 100036aecf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 130 additions and 98 deletions

View file

@ -206,10 +206,7 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
return main;
}
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const auto& info = program.info;
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
void SetupCapabilities(const Info& info, EmitContext& ctx) {
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::ImageQuery);
@ -247,6 +244,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
if (info.uses_group_ballot) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
}
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if (info.stage == Stage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry);
}
}
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const auto& info = program.info;
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.info.stage) {
case Stage::Compute: {
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
@ -290,6 +300,24 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo& runtime_info,
Id main_func) {
ctx.AddExtension("SPV_KHR_float_controls");
const auto fp_denorm_mode = runtime_info.fp_denorm_mode32;
if (fp_denorm_mode == AmdGpu::FpDenormMode::InOutFlush) {
if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
}
} else {
LOG_WARNING(Render_Vulkan, "Unknown FP denorm mode {}", u32(fp_denorm_mode));
}
const auto fp_round_mode = runtime_info.fp_round_mode32;
if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) {
LOG_WARNING(Render_Vulkan, "Unknown FP rounding mode {}", u32(fp_round_mode));
}
}
void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
auto inst{program.blocks.front()->begin()};
size_t block_index{0};
@ -314,18 +342,8 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
switch (program.info.stage) {
case Stage::Export:
case Stage::Vertex:
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
break;
case Stage::Geometry:
ctx.AddCapability(spv::Capability::Geometry);
break;
default:
break;
}
SetupCapabilities(program.info, ctx);
SetupFloatMode(ctx, profile, runtime_info, main);
PatchPhiNodes(program, ctx);
binding.user_data += program.info.ud_mask.NumRegs();
return ctx.Assemble();

View file

@ -284,7 +284,8 @@ void EmitContext::DefineInputs() {
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
for (const auto& input : runtime_info.fs_info.inputs) {
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i];
const u32 semantic = input.param_index;
ASSERT(semantic < IR::NumParams);
if (input.is_default && !input.is_flat) {
@ -333,7 +334,6 @@ void EmitContext::DefineInputs() {
const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u;
for (int param_id = 0; param_id < num_params; ++param_id) {
const IR::Attribute param{IR::Attribute::Param0 + param_id};
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
const Id id{DefineInput(type, param_id)};
Name(id, fmt::format("in_attr{}", param_id));
@ -394,8 +394,7 @@ void EmitContext::DefineOutputs() {
case Stage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 attr_id = 0; attr_id < runtime_info.gs_info.copy_data.num_attrs; attr_id++) {
const IR::Attribute param{IR::Attribute::Param0 + attr_id};
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
const Id id{DefineOutput(F32[4], attr_id)};
Name(id, fmt::format("out_attr{}", attr_id));
output_params[attr_id] = {id, output_f32, F32[1], 4u};

View file

@ -7,7 +7,7 @@
namespace Shader {
CopyShaderData ParseCopyShader(const std::span<const u32>& code) {
CopyShaderData ParseCopyShader(std::span<const u32> code) {
Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()};
Gcn::GcnDecodeContext decoder;

View file

@ -16,6 +16,6 @@ struct CopyShaderData {
u32 num_attrs{0};
};
CopyShaderData ParseCopyShader(const std::span<const u32>& code);
CopyShaderData ParseCopyShader(std::span<const u32> code);
} // namespace Shader

View file

@ -3,12 +3,12 @@
#pragma once
#include <span>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
@ -170,6 +170,8 @@ struct Info {
};
UserDataMask ud_mask{};
CopyShaderData gs_copy_data;
s8 vertex_offset_sgpr = -1;
s8 instance_offset_sgpr = -1;

View file

@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reg.h"
@ -11,6 +11,8 @@ namespace Shader::Optimization {
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage) {
auto& info = program.info;
const auto& ForEachInstruction = [&](auto func) {
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
@ -52,6 +54,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
break;
}
case Stage::Geometry: {
const auto& gs_info = runtime_info.gs_info;
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
@ -81,12 +86,12 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u;
const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size;
const auto comp_ofs = gs_info.output_vertices * 4u;
const auto output_size = comp_ofs * gs_info.out_vertex_data_size;
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs);
ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend());
const auto& it = info.gs_copy_data.attr_map.find(vc_read_ofs);
ASSERT(it != info.gs_copy_data.attr_map.cend());
const auto& [attr, comp] = it->second;
inst.ReplaceOpcode(IR::Opcode::SetAttribute);

View file

@ -10,20 +10,6 @@
namespace Shader::IR {
enum class FpRoundMode : u32 {
NearestEven = 0,
PlusInf = 1,
MinInf = 2,
ToZero = 3,
};
enum class FpDenormMode : u32 {
InOutFlush = 0,
InAllowOutFlush = 1,
InFlushOutAllow = 2,
InOutAllow = 3,
};
enum class FloatClassFunc : u32 {
SignalingNan = 1 << 0,
QuietNan = 1 << 1,
@ -41,13 +27,6 @@ enum class FloatClassFunc : u32 {
};
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
union Mode {
BitField<0, 4, FpRoundMode> fp_round;
BitField<4, 2, FpDenormMode> fp_denorm_single;
BitField<6, 2, FpDenormMode> fp_denorm_double;
BitField<8, 1, u32> dx10_clamp;
};
union TextureInstInfo {
u32 raw;
BitField<0, 1, u32> is_depth;

View file

@ -19,13 +19,8 @@ struct Profile {
bool support_float_controls{};
bool support_separate_denorm_behavior{};
bool support_separate_rounding_mode{};
bool support_fp16_denorm_preserve{};
bool support_fp32_denorm_preserve{};
bool support_fp16_denorm_flush{};
bool support_fp32_denorm_flush{};
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
bool support_explicit_workgroup_layout{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};

View file

@ -4,11 +4,9 @@
#pragma once
#include <algorithm>
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "frontend/copy_shader.h"
#include "video_core/amdgpu/types.h"
namespace Shader {
@ -62,7 +60,8 @@ enum class VsOutput : u8 {
using VsOutputMap = std::array<VsOutput, 4>;
struct VertexRuntimeInfo {
boost::container::static_vector<VsOutputMap, 3> outputs;
u32 num_outputs;
std::array<VsOutputMap, 3> outputs;
bool emulate_depth_negative_one_to_one{};
bool operator==(const VertexRuntimeInfo& other) const noexcept {
@ -79,13 +78,13 @@ struct GeometryRuntimeInfo {
u32 out_vertex_data_size{};
AmdGpu::PrimitiveType in_primitive;
GsOutputPrimTypes out_primitive;
CopyShaderData copy_data;
std::span<const u32> vs_copy;
u64 vs_copy_hash;
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
return num_invocations && other.num_invocations &&
output_vertices == other.output_vertices && in_primitive == other.in_primitive &&
std::ranges::equal(out_primitive, other.out_primitive) &&
std::ranges::equal(copy_data.attr_map, other.copy_data.attr_map);
std::ranges::equal(out_primitive, other.out_primitive);
}
};
@ -106,7 +105,8 @@ struct FragmentRuntimeInfo {
auto operator<=>(const PsInput&) const noexcept = default;
};
boost::container::static_vector<PsInput, 32> inputs;
u32 num_inputs;
std::array<PsInput, 32> inputs;
struct PsColorBuffer {
AmdGpu::NumberFormat num_format;
MrtSwizzle mrt_swizzle;
@ -117,7 +117,9 @@ struct FragmentRuntimeInfo {
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) &&
std::ranges::equal(inputs, other.inputs);
num_inputs == other.num_inputs &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
other.inputs.begin() + num_inputs);
}
};
@ -141,11 +143,15 @@ struct RuntimeInfo {
u32 num_user_data;
u32 num_input_vgprs;
u32 num_allocated_vgprs;
ExportRuntimeInfo es_info;
VertexRuntimeInfo vs_info;
GeometryRuntimeInfo gs_info;
FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info;
AmdGpu::FpDenormMode fp_denorm_mode32;
AmdGpu::FpRoundMode fp_round_mode32;
union {
ExportRuntimeInfo es_info;
VertexRuntimeInfo vs_info;
GeometryRuntimeInfo gs_info;
FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info;
};
RuntimeInfo(Stage stage_) : stage{stage_} {}