diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 787aba251..ceb915f6a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -287,7 +287,7 @@ jobs: sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -309,7 +309,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -348,7 +348,7 @@ jobs: sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -370,7 +370,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -395,7 +395,7 @@ jobs: submodules: recursive - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -417,7 +417,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -431,7 +431,7 @@ jobs: submodules: recursive - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -453,7 +453,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b3dc0b67..97a223d06 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -844,6 +844,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp + src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp diff --git a/src/core/devtools/layer.cpp b/src/core/devtools/layer.cpp index 94b39e801..a93178de5 100644 --- a/src/core/devtools/layer.cpp +++ b/src/core/devtools/layer.cpp @@ -1,11 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "SDL3/SDL_log.h" #include "layer.h" #include +#include "SDL3/SDL_log.h" #include "common/config.h" #include "common/singleton.h" #include "common/types.h" diff --git a/src/core/devtools/options.cpp b/src/core/devtools/options.cpp index 2def42071..f4b0ceb9a 100644 --- a/src/core/devtools/options.cpp +++ b/src/core/devtools/options.cpp @@ -1,9 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "options.h" + +#include #include -#include "options.h" +#include "video_core/renderer_vulkan/vk_presenter.h" + +extern std::unique_ptr presenter; namespace Core::Devtools { @@ -12,6 +17,7 @@ TOptions Options; void LoadOptionsConfig(const char* line) { char str[512]; int i; + float f; if (sscanf(line, "disassembler_cli_isa=%511[^\n]", str) == 1) { Options.disassembler_cli_isa = str; return; @@ -24,12 +30,26 @@ void LoadOptionsConfig(const char* line) { Options.frame_dump_render_on_collapse = i != 0; return; } + if (sscanf(line, "fsr_enabled=%d", &i) == 1) { + presenter->GetFsrSettingsRef().enable = i != 0; + return; + } + if (sscanf(line, "fsr_rcas_enabled=%d", &i) == 1) { + presenter->GetFsrSettingsRef().use_rcas = i != 0; + return; + } + if (sscanf(line, "fsr_rcas_attenuation=%f", &f) == 1) { + presenter->GetFsrSettingsRef().rcas_attenuation = f; + } } void SerializeOptionsConfig(ImGuiTextBuffer* buf) { buf->appendf("disassembler_cli_isa=%s\n", Options.disassembler_cli_isa.c_str()); buf->appendf("disassembler_cli_spv=%s\n", Options.disassembler_cli_spv.c_str()); buf->appendf("frame_dump_render_on_collapse=%d\n", Options.frame_dump_render_on_collapse); + buf->appendf("fsr_enabled=%d\n", presenter->GetFsrSettingsRef().enable); + buf->appendf("fsr_rcas_enabled=%d\n", presenter->GetFsrSettingsRef().use_rcas); + buf->appendf("fsr_rcas_attenuation=%f\n", presenter->GetFsrSettingsRef().rcas_attenuation); } } // namespace Core::Devtools diff --git a/src/core/libraries/ime/ime_dialog.h b/src/core/libraries/ime/ime_dialog.h index b726b8c99..526e5f022 100644 --- a/src/core/libraries/ime/ime_dialog.h +++ b/src/core/libraries/ime/ime_dialog.h @@ -13,7 +13,7 @@ class SymbolsResolver; namespace Libraries::ImeDialog { -constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 0x3FF; // 0x78; +constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 2048; enum class Error : u32 { OK = 0x0, diff --git a/src/imgui/imgui_texture.h b/src/imgui/imgui_texture.h index 1a38066d0..d84eda6b7 100644 --- a/src/imgui/imgui_texture.h +++ b/src/imgui/imgui_texture.h @@ -4,8 +4,11 @@ #pragma once #include +#include #include +#include "common/types.h" + namespace ImGui { namespace Core::TextureManager { diff --git a/src/imgui/renderer/imgui_core.cpp b/src/imgui/renderer/imgui_core.cpp index 53471e669..120c3d0ec 100644 --- a/src/imgui/renderer/imgui_core.cpp +++ b/src/imgui/renderer/imgui_core.cpp @@ -149,6 +149,8 @@ void Initialize(const ::Vulkan::Instance& instance, const Frontend::WindowSDL& w if (const auto dpi = SDL_GetWindowDisplayScale(window.GetSDLWindow()); dpi > 0.0f) { GetIO().FontGlobalScale = dpi; } + + std::at_quick_exit([] { SaveIniSettingsToDisk(GetIO().IniFilename); }); } void OnResize() { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index c6ec65606..211899714 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -38,6 +38,7 @@ Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto e const Id ib_label = ctx.OpLabel(); const Id oob_label = ctx.OpLabel(); const Id end_label = ctx.OpLabel(); + ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone); ctx.OpBranchConditional(in_bounds, ib_label, oob_label); ctx.AddLabel(ib_label); const Id ib_result = emit_func(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 56a6abc05..43655ba3f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -64,10 +64,6 @@ Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -Id EmitBitCastU64F64(EmitContext& ctx, Id value) { - return ctx.OpBitcast(ctx.U64, value); -} - Id EmitBitCastF16U16(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F16[1], value); } @@ -76,10 +72,6 @@ Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitBitCastF64U64(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); -} - Id EmitPackUint2x32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U64, value); } @@ -88,10 +80,14 @@ Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[2], value); } -Id EmitPackFloat2x32(EmitContext& ctx, Id value) { +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F64[1], value); } +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + Id EmitPackUnorm2x16(EmitContext& ctx, Id value) { return ctx.OpPackUnorm2x16(ctx.U32[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 9b7528be8..079f1005d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -202,13 +202,12 @@ Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitBitCastU16F16(EmitContext& ctx, Id value); Id EmitBitCastU32F32(EmitContext& ctx, Id value); -Id EmitBitCastU64F64(EmitContext& ctx, Id value); Id EmitBitCastF16U16(EmitContext& ctx, Id value); Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); Id EmitPackUint2x32(EmitContext& ctx, Id value); Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x32(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); Id EmitPackUnorm2x16(EmitContext& ctx, Id value); Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value); Id EmitPackSnorm2x16(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 230f3917f..c5a5814a4 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -336,7 +336,7 @@ T Translator::GetSrc64(const InstOperand& operand) { const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); if constexpr (is_float) { - value = ir.PackFloat2x32(ir.CompositeConstruct(value_lo, value_hi)); + value = ir.PackDouble2x32(ir.CompositeConstruct(value_lo, value_hi)); } else { value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); } @@ -444,10 +444,9 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra value_untyped = ir.FPSaturate(value_raw); } } - const IR::U64 value = - is_float ? ir.BitCast(IR::F64{value_untyped}) : IR::U64{value_untyped}; - const IR::Value unpacked{ir.UnpackUint2x32(value)}; + const IR::Value unpacked{is_float ? ir.UnpackDouble2x32(IR::F64{value_untyped}) + : ir.UnpackUint2x32(IR::U64{value_untyped})}; const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)}; const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)}; switch (operand.field) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e8836bb4c..e1ebf2206 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -84,16 +84,6 @@ IR::F16 IREmitter::BitCast(const IR::U16& value) { return Inst(Opcode::BitCastF16U16, value); } -template <> -IR::U64 IREmitter::BitCast(const IR::F64& value) { - return Inst(Opcode::BitCastU64F64, value); -} - -template <> -IR::F64 IREmitter::BitCast(const IR::U64& value) { - return Inst(Opcode::BitCastF64U64, value); -} - U1 IREmitter::ConditionRef(const U1& value) { return Inst(Opcode::ConditionRef, value); } @@ -841,8 +831,12 @@ Value IREmitter::UnpackUint2x32(const U64& value) { return Inst(Opcode::UnpackUint2x32, value); } -F64 IREmitter::PackFloat2x32(const Value& vector) { - return Inst(Opcode::PackFloat2x32, vector); +F64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst(Opcode::PackDouble2x32, vector); +} + +Value IREmitter::UnpackDouble2x32(const F64& value) { + return Inst(Opcode::UnpackDouble2x32, value); } U32 IREmitter::Pack2x16(const AmdGpu::NumberFormat number_format, const Value& vector) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 186d83a07..d978b3b4f 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -181,7 +181,8 @@ public: [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); - [[nodiscard]] F64 PackFloat2x32(const Value& vector); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const F64& value); [[nodiscard]] U32 Pack2x16(AmdGpu::NumberFormat number_format, const Value& vector); [[nodiscard]] Value Unpack2x16(AmdGpu::NumberFormat number_format, const U32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 4932ff9a0..6f186808c 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -191,14 +191,13 @@ OPCODE(SelectF64, F64, U1, // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) OPCODE(BitCastF16U16, F16, U16, ) OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) OPCODE(PackUint2x32, U64, U32x2, ) OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x32, F64, F32x2, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) OPCODE(PackUnorm2x16, U32, F32x2, ) OPCODE(UnpackUnorm2x16, F32x2, U32, ) diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 760dbb112..06e4ac850 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -21,6 +21,7 @@ void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerBufferFormatToRaw(IR::Program& program); +void LowerFp64ToFp32(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp index 3fdc6f0cd..658a495bc 100644 --- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp +++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp @@ -196,13 +196,18 @@ static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) const auto buffer{desc.GetSharp(info)}; const auto is_inst_typed = flags.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; - const auto data_format = is_inst_typed ? flags.inst_data_fmt.Value() : buffer.GetDataFmt(); - const auto num_format = is_inst_typed ? flags.inst_num_fmt.Value() : buffer.GetNumberFmt(); + const auto data_format = + is_inst_typed ? AmdGpu::RemapDataFormat(flags.inst_data_fmt.Value()) : buffer.GetDataFmt(); const auto format_info = FormatInfo{ .data_format = data_format, - .num_format = num_format, - .swizzle = is_inst_typed ? AmdGpu::IdentityMapping : buffer.DstSelect(), - .num_conversion = AmdGpu::MapNumberConversion(num_format), + .num_format = is_inst_typed + ? AmdGpu::RemapNumberFormat(flags.inst_num_fmt.Value(), data_format) + : buffer.GetNumberFmt(), + .swizzle = is_inst_typed + ? AmdGpu::RemapSwizzle(flags.inst_data_fmt.Value(), AmdGpu::IdentityMapping) + : buffer.DstSelect(), + .num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value()) + : buffer.GetNumberConversion(), .num_components = AmdGpu::NumComponents(data_format), }; diff --git a/src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp new file mode 100644 index 000000000..3c30e75b4 --- /dev/null +++ b/src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +constexpr s32 F64ToF32Exp = +1023 - 127; +constexpr s32 F32ToF64Exp = +127 - 1023; + +static IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { + const IR::U32 lo{ir.CompositeExtract(packed, 0)}; + const IR::U32 hi{ir.CompositeExtract(packed, 1)}; + const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; + const IR::U32 mantissa{ + ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; + const IR::U32 result{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; + return ir.BitCast(result); +} + +IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { + const IR::U32 value{ir.BitCast(IR::F32(raw))}; + const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; + const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; + const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; + const IR::U32 hi{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; + return ir.CompositeConstruct(lo, hi); +} + +static IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::CompositeConstructF64x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF64x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF64x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF64x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF64x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF64x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF64x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF64x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF64x4: + return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::CompositeShuffleF64x2: + return IR::Opcode::CompositeShuffleF32x2; + case IR::Opcode::CompositeShuffleF64x3: + return IR::Opcode::CompositeShuffleF32x3; + case IR::Opcode::CompositeShuffleF64x4: + return IR::Opcode::CompositeShuffleF32x4; + case IR::Opcode::SelectF64: + return IR::Opcode::SelectF64; + case IR::Opcode::FPAbs64: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd64: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPFma64: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMax64: + return IR::Opcode::FPMax32; + case IR::Opcode::FPMin64: + return IR::Opcode::FPMin32; + case IR::Opcode::FPMul64: + return IR::Opcode::FPMul32; + case IR::Opcode::FPDiv64: + return IR::Opcode::FPDiv32; + case IR::Opcode::FPNeg64: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRecip64: + return IR::Opcode::FPRecip32; + case IR::Opcode::FPRecipSqrt64: + return IR::Opcode::FPRecipSqrt32; + case IR::Opcode::FPSaturate64: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp64: + return IR::Opcode::FPClamp32; + case IR::Opcode::FPRoundEven64: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPFloor64: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPCeil64: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPTrunc64: + return IR::Opcode::FPTrunc32; + case IR::Opcode::FPFract64: + return IR::Opcode::FPFract32; + case IR::Opcode::FPFrexpSig64: + return IR::Opcode::FPFrexpSig32; + case IR::Opcode::FPFrexpExp64: + return IR::Opcode::FPFrexpExp32; + case IR::Opcode::FPOrdEqual64: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual64: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual64: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual64: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan64: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan64: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan64: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan64: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual64: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual64: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual64: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual64: + return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan64: + return IR::Opcode::FPIsNan32; + case IR::Opcode::FPIsInf64: + return IR::Opcode::FPIsInf32; + case IR::Opcode::ConvertS32F64: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertF32F64: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64F32: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF64U32: + return IR::Opcode::ConvertF32U32; + default: + return op; + } +} + +static void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); + break; + } + case IR::Opcode::UnpackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); + break; + } + default: + inst.ReplaceOpcode(Replace(inst.GetOpcode())); + break; + } +} + +void LowerFp64ToFp32(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 219378a6c..d739b2da5 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -44,7 +44,8 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::BitCastF16U16: info.uses_fp16 = true; break; - case IR::Opcode::BitCastU64F64: + case IR::Opcode::PackDouble2x32: + case IR::Opcode::UnpackDouble2x32: info.uses_fp64 = true; break; case IR::Opcode::ImageWrite: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3b2854d59..9aac6230a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,7 @@ struct Profile { bool support_int8{}; bool support_int16{}; bool support_int64{}; + bool support_float64{}; bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5004e0beb..3e0bd98d2 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -60,6 +60,9 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); // Run optimization passes + if (!profile.support_float64) { + Shader::Optimization::LowerFp64ToFp32(program); + } Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); diff --git a/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp b/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp index 0c40ffd7a..73dd3a7b5 100644 --- a/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp +++ b/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp @@ -14,7 +14,7 @@ namespace Vulkan::HostPasses { -void PostProcessingPass::Create(vk::Device device) { +void PostProcessingPass::Create(vk::Device device, const vk::Format surface_format) { static const std::array pp_shaders{ HostShaders::FS_TRI_VERT, HostShaders::POST_PROCESS_FRAG, @@ -76,7 +76,7 @@ void PostProcessingPass::Create(vk::Device device) { Check<"create pp pipeline layout">(device.createPipelineLayoutUnique(layout_info)); const std::array pp_color_formats{ - vk::Format::eB8G8R8A8Unorm, // swapchain.GetSurfaceFormat().format, + surface_format, }; const vk::PipelineRenderingCreateInfo pipeline_rendering_ci{ .colorAttachmentCount = pp_color_formats.size(), diff --git a/src/video_core/renderer_vulkan/host_passes/pp_pass.h b/src/video_core/renderer_vulkan/host_passes/pp_pass.h index 6127bb5c1..f95c02e8d 100644 --- a/src/video_core/renderer_vulkan/host_passes/pp_pass.h +++ b/src/video_core/renderer_vulkan/host_passes/pp_pass.h @@ -19,7 +19,7 @@ public: u32 hdr = 0; }; - void Create(vk::Device device); + void Create(vk::Device device, vk::Format surface_format); void Render(vk::CommandBuffer cmdbuf, vk::ImageView input, vk::Extent2D input_size, Frame& output, Settings settings); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d33a1607b..072807124 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -332,6 +332,7 @@ bool Instance::CreateDevice() { .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, + .depthBounds = features.depthBounds, .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, .vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index b3f3e60b6..bf9af1f24 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -89,6 +89,11 @@ public: return features.depthBounds; } + /// Returns true if 64-bit floats are supported in shaders + bool IsShaderFloat64Supported() const { + return features.shaderFloat64; + } + /// Returns true when VK_EXT_custom_border_color is supported bool IsCustomBorderColorSupported() const { return custom_border_color; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index efb1966ba..0b991cda0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -196,6 +196,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, profile = Shader::Profile{ .supported_spirv = SpirvVersion1_6, .subgroup_size = instance.SubgroupSize(), + .support_float64 = instance.IsShaderFloat64Supported(), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32), diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 4a6a5c7c2..6bd4b26fa 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -130,7 +130,7 @@ Presenter::Presenter(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_ } fsr_pass.Create(device, instance.GetAllocator(), num_images); - pp_pass.Create(device); + pp_pass.Create(device, swapchain.GetSurfaceFormat().format); ImGui::Layer::AddLayer(Common::Singleton::Instance()); }