diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c40f0a8f..de8753dbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,7 +419,10 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h + src/shader_recompiler/info.h + src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h + src/shader_recompiler/specialization.h src/shader_recompiler/backend/spirv/emit_spirv.cpp src/shader_recompiler/backend/spirv/emit_spirv.h src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -533,8 +536,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp src/video_core/renderer_vulkan/vk_scheduler.h - src/video_core/renderer_vulkan/vk_shader_cache.cpp - src/video_core/renderer_vulkan/vk_shader_cache.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -647,13 +648,18 @@ target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::to target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3) if (APPLE) + option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) + if (USE_SYSTEM_VULKAN_LOADER) + target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1) + else() + # Link MoltenVK for Vulkan support + find_library(MOLTENVK MoltenVK REQUIRED) + target_link_libraries(shadps4 PRIVATE ${MOLTENVK}) + endif() + # Reserve system-managed memory space. target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000) - # Link MoltenVK for Vulkan support - find_library(MOLTENVK MoltenVK REQUIRED) - target_link_libraries(shadps4 PRIVATE ${MOLTENVK}) - # Replacement for std::chrono::time_zone target_link_libraries(shadps4 PRIVATE date::date-tz) @@ -737,3 +743,10 @@ if (ENABLE_QT_GUI) set_source_files_properties(src/images/shadPS4.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources) endif() + +if (UNIX AND NOT APPLE) + if (ENABLE_QT_GUI) + find_package(OpenSSL REQUIRED) + target_link_libraries(shadps4 PRIVATE ${OPENSSL_LIBRARIES}) + endif() +endif() diff --git a/README.md b/README.md index c99142c78..db898e565 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ To get the latest news, go to our [**X (Twitter)**](https://x.com/shadps4) or ou # Status > [!IMPORTANT] -> shadPS4 is early in developement, don't expect a flawless experience. +> shadPS4 is early in development, don't expect a flawless experience. Currently, the emulator successfully runs small games like [**Sonic Mania**](https://www.youtube.com/watch?v=AAHoNzhHyCU), [**Undertale**](https://youtu.be/5zIvdy65Ro4) and it can even *somewhat* run [**Bloodborne**](https://www.youtube.com/watch?v=wC6s0avpQRE). @@ -65,6 +65,12 @@ Check the build instructions for [**Windows**](https://github.com/shadps4-emu/sh Check the build instructions for [**Linux**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/building-linux.md). +## macOS + +Check the build instructions for [**macOS**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/building-macos.md). + +Note that macOS users need at least macOS 15 on an Apple Silicon Mac, or at least macOS 11 on an Intel Mac. + ## Building status
diff --git a/documents/building-macos.md b/documents/building-macos.md new file mode 100644 index 000000000..d8cc414e2 --- /dev/null +++ b/documents/building-macos.md @@ -0,0 +1,73 @@ + + +## Build shadPS4 for macOS + +### Install the necessary tools to build shadPS4: + +First, make sure you have **Xcode 16.0 or newer** installed. + +For installing other tools and library dependencies we will be using [Homebrew](https://brew.sh/). + +On an ARM system, we will need the native ARM Homebrew to install tools and x86_64 Homebrew to install libraries. + +First, install native Homebrew and tools: +``` +# Installs native Homebrew to /opt/homebrew +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +# Adds Homebrew to your path +echo 'eval $(/opt/homebrew/bin/brew shellenv)' >> ~/.zprofile +eval $(/opt/homebrew/bin/brew shellenv) +# Installs tools. +brew install clang-format cmake +``` + +Next, install x86_64 Homebrew and libraries. + +**If you are on an ARM Mac:** +``` +# Installs x86_64 Homebrew to /usr/local +arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +# Installs libraries. +arch -x86_64 /usr/local/bin/brew install molten-vk qt@6 +``` + +**If you are on an x86_64 Mac:** +``` +brew install molten-vk qt@6 +``` + +If you don't need the Qt GUI you can remove `qt@6` from the last command. + +### Cloning and compiling: + +Clone the repository recursively: +``` +git clone --recursive https://github.com/shadps4-emu/shadPS4.git +cd shadPS4 +``` + +Generate the build directory in the shadPS4 directory: +``` +cmake -S . -B build/ -DCMAKE_OSX_ARCHITECTURES=x86_64 +``` + +If you want to build the Qt GUI, add `-DENABLE_QT_GUI=ON` to the end of this command as well. + +Enter the directory: +``` +cd build/ +``` + +Use make to build the project: +``` +cmake --build . --parallel$(sysctl -n hw.ncpu) +``` + +Now run the emulator: + +``` +./shadps4 /"PATH"/"TO"/"GAME"/"FOLDER"/eboot.bin +``` diff --git a/documents/building-windows.md b/documents/building-windows.md index 21fd87154..fb1bb93ca 100644 --- a/documents/building-windows.md +++ b/documents/building-windows.md @@ -6,7 +6,7 @@ SPDX-License-Identifier: GPL-2.0-or-later # Build shadPS4 for Windows This tutorial reads as if you have none of the prerequisites already installed. If you do, just ignore the steps regarding installation. -If you are building to contribute to the project, please omit `--depth 1` from the git invokations. +If you are building to contribute to the project, please omit `--depth 1` from the git invocations. Note: **ARM64 is not supported!** As of writing, it will not build nor run. The instructions with respect to ARM64 are for developers only. @@ -15,6 +15,7 @@ Note: **ARM64 is not supported!** As of writing, it will not build nor run. The ### (Prerequisite) Download the Community edition from [**Visual Studio 2022**](https://visualstudio.microsoft.com/vs/) Once you are within the installer: + 1. Select `Desktop development with C++` 2. Go to "Individual Components" tab 3. Search and select `C++ Clang Compiler for Windows` and `MSBuild support for LLVM` @@ -30,11 +31,12 @@ Beware, this requires you to create a Qt account. If you do not want to do this, Go through the installation normally. If you know what you are doing, you may unselect individual components that eat up too much disk space. 2. Download and install [Qt Visual Studio Tools](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2022) - + Once you are finished, you will have to configure Qt within Visual Studio: + 1. Tools -> Options -> Qt -> Versions -2. Add a new Qt version and navigate it to the correct folder. Should look like so: `C:\Qt\6.7.2\msvc2019_64` -3. Enable the default checkmark on the new version you just created. +2. Add a new Qt version and navigate it to the correct folder. Should look like so: `C:\Qt\6.7.2\msvc2019_64` +3. Enable the default checkmark on the new version you just created. ### (Prerequisite) Download [**Git for Windows**](https://git-scm.com/download/win) @@ -62,7 +64,7 @@ Go through the Git for Windows installation as normal Your shadps4.exe will be in `c:\path\to\source\Build\x64-Clang-Release\` To automatically populate the necessary files to run shadPS4.exe, run in a command prompt or terminal: -`C:\Qt\6.7.2\msvc2019_64\bin\windeployqt.exe c:\path\to\shadps4.exe` +`C:\Qt\6.7.2\msvc2019_64\bin\windeployqt.exe "c:\path\to\shadps4.exe"` (Change Qt path if you've installed it to non-default path) ## Option 2: MSYS2/MinGW @@ -74,27 +76,35 @@ Go through the MSYS2 installation as normal If you are building to distribute, please omit `-DCMAKE_CXX_FLAGS="-O2 -march=native"` within the build configuration step. Normal x86-based computers, follow: + 1. Open "MSYS2 MINGW64" from your new applications 2. Run `pacman -Syu`, let it complete; -3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja mingw-w64-x86_64-qt6-base` +3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja mingw-w64-x86_64-ffmpeg` + 1. Optional (Qt only): run `pacman -S --needed mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-tools` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` -6. Run `cmake -S . -B build -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` +6. Run `cmake -S . -B build -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` + 1. Optional (Qt only): add `-DENABLE_QT_GUI=ON` 7. Run `cmake --build build` + 1. Optional (Qt only): run `windeployqt6 build/shadps4.exe` 8. To run the finished product, run `./build/shadPS4.exe` ARM64-based computers, follow: + 1. Open "MSYS2 CLANGARM64" from your new applications 2. Run `pacman -Syu`, let it complete; -3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-qt6-base` +3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` + 1. Optional (Qt only): run `pacman -S --needed mingw-w64-clang-aarch64-qt6-base mingw-w64-clang-aarch64-qt6-tools` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` -6. Run `cmake -S . -B build -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` +6. Run `cmake -S . -B build -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` + 1. Optional (Qt only): add `-DENABLE_QT_GUI=ON` 7. Run `cmake --build build` + 1. Optional (Qt only): run `windeployqt6 build/shadps4.exe` 8. To run the finished product, run `./build/shadPS4.exe` ## Note on MSYS2 builds These builds may not be easily copyable to people who do not also have a MSYS2 installation. If you want to distribute these builds, you need to copy over the correct DLLs into a distribution folder. -In order to run them, you must be within the MSYS2 shell environment. \ No newline at end of file +In order to run them, you must be within the MSYS2 shell environment. diff --git a/src/common/config.cpp b/src/common/config.cpp index 8d87ed3c3..fb6ee120a 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -30,7 +30,9 @@ static bool vkValidation = false; static bool vkValidationSync = false; static bool vkValidationGpu = false; static bool rdocEnable = false; -static bool rdocMarkersEnable = false; +static bool vkMarkers = false; +static bool vkCrashDiagnostic = false; + // Gui std::string settings_install_dir = ""; u32 main_window_geometry_x = 400; @@ -121,7 +123,7 @@ bool isRdocEnabled() { } bool isMarkersEnabled() { - return rdocMarkersEnable; + return vkMarkers; } u32 vblankDiv() { @@ -140,6 +142,14 @@ bool vkValidationGpuEnabled() { return vkValidationGpu; } +bool vkMarkersEnabled() { + return vkMarkers || vkCrashDiagnostic; // Crash diagnostic forces markers on +} + +bool vkCrashDiagnosticEnabled() { + return vkCrashDiagnostic; +} + void setGpuId(s32 selectedGpuId) { gpuId = selectedGpuId; } @@ -384,7 +394,8 @@ void load(const std::filesystem::path& path) { vkValidationSync = toml::find_or(vk, "validation_sync", false); vkValidationGpu = toml::find_or(vk, "validation_gpu", true); rdocEnable = toml::find_or(vk, "rdocEnable", false); - rdocMarkersEnable = toml::find_or(vk, "rdocMarkersEnable", false); + vkMarkers = toml::find_or(vk, "rdocMarkersEnable", false); + vkCrashDiagnostic = toml::find_or(vk, "crashDiagnostic", false); } if (data.contains("Debug")) { @@ -460,7 +471,8 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["validation_sync"] = vkValidationSync; data["Vulkan"]["validation_gpu"] = vkValidationGpu; data["Vulkan"]["rdocEnable"] = rdocEnable; - data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable; + data["Vulkan"]["rdocMarkersEnable"] = vkMarkers; + data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic; data["Debug"]["DebugDump"] = isDebugDump; data["GUI"]["theme"] = mw_themes; data["GUI"]["iconSize"] = m_icon_size; @@ -504,7 +516,11 @@ void setDefaultValues() { shouldDumpPM4 = false; vblankDivider = 1; vkValidation = false; + vkValidationSync = false; + vkValidationGpu = false; rdocEnable = false; + vkMarkers = false; + vkCrashDiagnostic = false; emulator_language = "en"; m_language = 1; gpuId = -1; diff --git a/src/common/config.h b/src/common/config.h index 11e7d8827..7e717fe71 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -31,7 +31,6 @@ bool copyGPUCmdBuffers(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); -bool isMarkersEnabled(); u32 vblankDiv(); void setDebugDump(bool enable); @@ -62,6 +61,8 @@ void setRdocEnabled(bool enable); bool vkValidationEnabled(); bool vkValidationSyncEnabled(); bool vkValidationGpuEnabled(); +bool vkMarkersEnabled(); +bool vkCrashDiagnosticEnabled(); // Gui void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h); diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 151d34986..f31ff18cb 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -212,31 +212,38 @@ static void RestoreRegisters(Xbyak::CodeGenerator& c, } /// Switches to the patch stack and stores all registers. -static void SaveContext(Xbyak::CodeGenerator& c) { +static void SaveContext(Xbyak::CodeGenerator& c, bool save_flags = false) { SaveStack(c); for (int reg = Xbyak::Operand::RAX; reg <= Xbyak::Operand::R15; reg++) { c.push(Xbyak::Reg64(reg)); } for (int reg = 0; reg <= 7; reg++) { - c.sub(rsp, 32); + c.lea(rsp, ptr[rsp - 32]); c.vmovdqu(ptr[rsp], Xbyak::Ymm(reg)); } + if (save_flags) { + c.pushfq(); + } } /// Restores all registers and restores the original stack. /// If the destination is a register, it is not restored to preserve the output. -static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst) { +static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst, + bool restore_flags = false) { + if (restore_flags) { + c.popfq(); + } for (int reg = 7; reg >= 0; reg--) { if ((!dst.isXMM() && !dst.isYMM()) || dst.getIdx() != reg) { c.vmovdqu(Xbyak::Ymm(reg), ptr[rsp]); } - c.add(rsp, 32); + c.lea(rsp, ptr[rsp + 32]); } for (int reg = Xbyak::Operand::R15; reg >= Xbyak::Operand::RAX; reg--) { if (!dst.isREG() || dst.getIdx() != reg) { c.pop(Xbyak::Reg64(reg)); } else { - c.add(rsp, 8); + c.lea(rsp, ptr[rsp + 8]); } } RestoreStack(c); @@ -307,9 +314,24 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat SaveRegisters(c, {scratch}); + // BLSI sets CF to zero if source is zero, otherwise it sets CF to one. + Xbyak::Label set_carry, clear_carry, end; + c.mov(scratch, *src); - c.neg(scratch); + c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise + c.jc(set_carry); + c.jmp(clear_carry); + + c.L(set_carry); c.and_(scratch, *src); + c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF + c.jmp(end); + + c.L(clear_carry); + c.and_(scratch, *src); + // We don't need to clear carry here since AND does that for us + + c.L(end); c.mov(dst, scratch); RestoreRegisters(c, {scratch}); @@ -323,9 +345,26 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener SaveRegisters(c, {scratch}); + Xbyak::Label set_carry, clear_carry, end; + + // BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one. c.mov(scratch, *src); + c.test(scratch, scratch); + c.jz(set_carry); + c.jmp(clear_carry); + + c.L(set_carry); c.dec(scratch); c.xor_(scratch, *src); + c.stc(); + c.jmp(end); + + c.L(clear_carry); + c.dec(scratch); + c.xor_(scratch, *src); + // We don't need to clear carry here since XOR does that for us + + c.L(end); c.mov(dst, scratch); RestoreRegisters(c, {scratch}); @@ -339,9 +378,26 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat SaveRegisters(c, {scratch}); + Xbyak::Label set_carry, clear_carry, end; + + // BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one. c.mov(scratch, *src); + c.test(scratch, scratch); + c.jz(set_carry); + c.jmp(clear_carry); + + c.L(set_carry); c.dec(scratch); c.and_(scratch, *src); + c.stc(); + c.jmp(end); + + c.L(clear_carry); + c.dec(scratch); + c.and_(scratch, *src); + // We don't need to clear carry here since AND does that for us + + c.L(end); c.mov(dst, scratch); RestoreRegisters(c, {scratch}); @@ -361,7 +417,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe const auto float_count = dst.getBit() / 32; const auto byte_count = float_count * 4; - SaveContext(c); + SaveContext(c, true); // Allocate stack space for outputs and load into first parameter. c.sub(rsp, byte_count); @@ -397,7 +453,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe } c.add(rsp, byte_count); - RestoreContext(c, dst); + RestoreContext(c, dst, true); } using SingleToHalfFloatConverter = half_float::half (*)(float); @@ -425,7 +481,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe const auto float_count = src.getBit() / 32; const auto byte_count = float_count * 4; - SaveContext(c); + SaveContext(c, true); if (dst->isXMM()) { // Allocate stack space for outputs and load into first parameter. @@ -472,7 +528,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe c.add(rsp, byte_count); } - RestoreContext(c, *dst); + RestoreContext(c, *dst, true); } static bool FilterRosetta2Only(const ZydisDecodedOperand*) { diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index a2ef94037..ffec70300 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -499,7 +499,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() { } void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { - LOG_INFO(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw); + LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw); if (gnm_vqid == 0) { return; @@ -2054,7 +2054,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, u32 flip_arg) { - LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx); + LOG_DEBUG(Lib_GnmDriver, "called [buf = {}]", buf_idx); auto* cmdbuf = dcb_gpu_addrs[count - 1]; const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4; @@ -2078,7 +2078,7 @@ int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() { s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes) { - LOG_INFO(Lib_GnmDriver, "called"); + LOG_DEBUG(Lib_GnmDriver, "called"); if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) { LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL"); @@ -2154,7 +2154,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() { } int PS4_SYSV_ABI sceGnmSubmitDone() { - LOG_INFO(Lib_GnmDriver, "called"); + LOG_DEBUG(Lib_GnmDriver, "called"); if (!liverpool->IsGpuIdle()) { submission_lock = true; } diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index b48990bfb..205b2274f 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -56,7 +56,7 @@ struct OrbisVirtualQueryInfo { BitField<1, 1, u32> is_direct; BitField<2, 1, u32> is_stack; BitField<3, 1, u32> is_pooled; - BitField<4, 1, u32> is_commited; + BitField<4, 1, u32> is_committed; }; std::array name; }; diff --git a/src/core/libraries/system/userservice.cpp b/src/core/libraries/system/userservice.cpp index 8c48b3111..cd7a721c0 100644 --- a/src/core/libraries/system/userservice.cpp +++ b/src/core/libraries/system/userservice.cpp @@ -565,7 +565,7 @@ int PS4_SYSV_ABI sceUserServiceGetLoginFlag() { } s32 PS4_SYSV_ABI sceUserServiceGetLoginUserIdList(OrbisUserServiceLoginUserIdList* userIdList) { - LOG_INFO(Lib_UserService, "called"); + LOG_DEBUG(Lib_UserService, "called"); if (userIdList == nullptr) { LOG_ERROR(Lib_UserService, "user_id is null"); return ORBIS_USER_SERVICE_ERROR_INVALID_ARGUMENT; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index a6c1a7623..631f77732 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -140,8 +140,8 @@ s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode return ORBIS_VIDEO_OUT_ERROR_INVALID_INDEX; } - LOG_INFO(Lib_VideoOut, "bufferIndex = {}, flipMode = {}, flipArg = {}", bufferIndex, flipMode, - flipArg); + LOG_DEBUG(Lib_VideoOut, "bufferIndex = {}, flipMode = {}, flipArg = {}", bufferIndex, flipMode, + flipArg); if (!driver->SubmitFlip(port, bufferIndex, flipArg)) { LOG_ERROR(Lib_VideoOut, "Flip queue is full"); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index af7d92ff9..6a6b76644 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -421,16 +421,20 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, const auto& vma = it->second; info->start = vma.base; info->end = vma.base + vma.size; + info->offset = vma.phys_base; info->protection = static_cast(vma.prot); info->is_flexible.Assign(vma.type == VMAType::Flexible); info->is_direct.Assign(vma.type == VMAType::Direct); - info->is_commited.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved); + info->is_stack.Assign(vma.type == VMAType::Stack); + info->is_pooled.Assign(vma.type == VMAType::Pooled); + info->is_committed.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved); vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); ASSERT(dmem_it != dmem_map.end()); - info->offset = vma.phys_base; info->memory_type = dmem_it->second.memory_type; + } else { + info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION; } return ORBIS_OK; diff --git a/src/emulator.cpp b/src/emulator.cpp index 85a4d745a..9d1bb00d9 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -64,7 +64,8 @@ Emulator::Emulator() { LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); - LOG_INFO(Config, "Vulkan rdocMarkersEnable: {}", Config::isMarkersEnabled()); + LOG_INFO(Config, "Vulkan rdocMarkersEnable: {}", Config::vkMarkersEnabled()); + LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::vkCrashDiagnosticEnabled()); // Defer until after logging is initialized. memory = Core::Memory::Instance(); diff --git a/src/qt_gui/cheats_patches.cpp b/src/qt_gui/cheats_patches.cpp index f7326192c..1c30f7e92 100644 --- a/src/qt_gui/cheats_patches.cpp +++ b/src/qt_gui/cheats_patches.cpp @@ -147,13 +147,13 @@ void CheatsPatches::setupUI() { controlLayout->addWidget(downloadComboBox); QPushButton* downloadButton = new QPushButton(tr("Download Cheats")); - connect(downloadButton, &QPushButton::clicked, [=]() { + connect(downloadButton, &QPushButton::clicked, [this, downloadComboBox]() { QString source = downloadComboBox->currentData().toString(); downloadCheats(source, m_gameSerial, m_gameVersion, true); }); QPushButton* deleteCheatButton = new QPushButton(tr("Delete File")); - connect(deleteCheatButton, &QPushButton::clicked, [=]() { + connect(deleteCheatButton, &QPushButton::clicked, [this, CHEATS_DIR_QString]() { QStringListModel* model = qobject_cast(listView_selectFile->model()); if (!model) { return; @@ -232,7 +232,7 @@ void CheatsPatches::setupUI() { patchesControlLayout->addWidget(patchesComboBox); QPushButton* patchesButton = new QPushButton(tr("Download Patches")); - connect(patchesButton, &QPushButton::clicked, [=]() { + connect(patchesButton, &QPushButton::clicked, [this]() { QString selectedOption = patchesComboBox->currentData().toString(); downloadPatches(selectedOption, true); }); @@ -444,8 +444,8 @@ QCheckBox* CheatsPatches::findCheckBoxByName(const QString& name) { return nullptr; } -void CheatsPatches::downloadCheats(const QString& source, const QString& m_gameSerial, - const QString& m_gameVersion, const bool showMessageBox) { +void CheatsPatches::downloadCheats(const QString& source, const QString& gameSerial, + const QString& gameVersion, const bool showMessageBox) { QDir dir(Common::FS::GetUserPath(Common::FS::PathType::CheatsDir)); if (!dir.exists()) { dir.mkpath("."); @@ -455,7 +455,7 @@ void CheatsPatches::downloadCheats(const QString& source, const QString& m_gameS if (source == "GoldHEN") { url = "https://raw.githubusercontent.com/GoldHEN/GoldHEN_Cheat_Repository/main/json.txt"; } else if (source == "wolf2022") { - url = "https://wolf2022.ir/trainer/" + m_gameSerial + "_" + m_gameVersion + ".json"; + url = "https://wolf2022.ir/trainer/" + gameSerial + "_" + gameVersion + ".json"; } else if (source == "shadPS4") { url = "https://raw.githubusercontent.com/shadps4-emu/ps4_cheats/main/" "CHEATS_JSON.txt"; @@ -468,7 +468,7 @@ void CheatsPatches::downloadCheats(const QString& source, const QString& m_gameS QNetworkRequest request(url); QNetworkReply* reply = manager->get(request); - connect(reply, &QNetworkReply::finished, [=]() { + connect(reply, &QNetworkReply::finished, [=, this]() { if (reply->error() == QNetworkReply::NoError) { QByteArray jsonData = reply->readAll(); bool foundFiles = false; @@ -476,7 +476,7 @@ void CheatsPatches::downloadCheats(const QString& source, const QString& m_gameS if (source == "GoldHEN" || source == "shadPS4") { QString textContent(jsonData); QRegularExpression regex( - QString("%1_%2[^=]*\.json").arg(m_gameSerial).arg(m_gameVersion)); + QString("%1_%2[^=]*\\.json").arg(gameSerial).arg(gameVersion)); QRegularExpressionMatchIterator matches = regex.globalMatch(textContent); QString baseUrl; @@ -519,7 +519,7 @@ void CheatsPatches::downloadCheats(const QString& source, const QString& m_gameS QNetworkRequest fileRequest(fileUrl); QNetworkReply* fileReply = manager->get(fileRequest); - connect(fileReply, &QNetworkReply::finished, [=]() { + connect(fileReply, &QNetworkReply::finished, [=, this]() { if (fileReply->error() == QNetworkReply::NoError) { QByteArray fileData = fileReply->readAll(); QFile localFile(localFilePath); @@ -669,7 +669,7 @@ void CheatsPatches::populateFileListPatches() { void CheatsPatches::downloadPatches(const QString repository, const bool showMessageBox) { QString url; if (repository == "GoldHEN") { - url = "https://github.com/GoldHEN/GoldHEN_Patch_Repository/tree/main/" + url = "https://github.com/illusion0001/PS4-PS5-Game-Patch/tree/main/" "patches/xml"; } if (repository == "shadPS4") { @@ -680,7 +680,7 @@ void CheatsPatches::downloadPatches(const QString repository, const bool showMes QNetworkRequest request(url); QNetworkReply* reply = manager->get(request); - connect(reply, &QNetworkReply::finished, [=]() { + connect(reply, &QNetworkReply::finished, [=, this]() { if (reply->error() == QNetworkReply::NoError) { QByteArray htmlData = reply->readAll(); reply->deleteLater(); @@ -713,8 +713,8 @@ void CheatsPatches::downloadPatches(const QString repository, const bool showMes if (fileName.endsWith(".xml")) { QString fileUrl; if (repository == "GoldHEN") { - fileUrl = QString("https://raw.githubusercontent.com/GoldHEN/" - "GoldHEN_Patch_Repository/main/%1") + fileUrl = QString("https://raw.githubusercontent.com/illusion0001/" + "PS4-PS5-Game-Patch/main/%1") .arg(filePath); } if (repository == "shadPS4") { @@ -725,7 +725,7 @@ void CheatsPatches::downloadPatches(const QString repository, const bool showMes QNetworkRequest fileRequest(fileUrl); QNetworkReply* fileReply = manager->get(fileRequest); - connect(fileReply, &QNetworkReply::finished, [=]() { + connect(fileReply, &QNetworkReply::finished, [=, this]() { if (fileReply->error() == QNetworkReply::NoError) { QByteArray fileData = fileReply->readAll(); QFile localFile(dir.filePath(fileName)); @@ -864,7 +864,7 @@ void CheatsPatches::addCheatsToLayout(const QJsonArray& modsArray, const QJsonAr rightLayout->addWidget(cheatCheckBox); m_cheatCheckBoxes.append(cheatCheckBox); connect(cheatCheckBox, &QCheckBox::toggled, - [=](bool checked) { applyCheat(modName, checked); }); + [this, modName](bool checked) { applyCheat(modName, checked); }); } else if (modType == "button") { QPushButton* cheatButton = new QPushButton(modName); cheatButton->adjustSize(); @@ -880,7 +880,8 @@ void CheatsPatches::addCheatsToLayout(const QJsonArray& modsArray, const QJsonAr buttonLayout->addStretch(); rightLayout->addLayout(buttonLayout); - connect(cheatButton, &QPushButton::clicked, [=]() { applyCheat(modName, true); }); + connect(cheatButton, &QPushButton::clicked, + [this, modName]() { applyCheat(modName, true); }); } } @@ -1093,7 +1094,7 @@ void CheatsPatches::addPatchesToLayout(const QString& filePath) { patchCheckBox->installEventFilter(this); connect(patchCheckBox, &QCheckBox::toggled, - [=](bool checked) { applyPatch(patchName, checked); }); + [this, patchName](bool checked) { applyPatch(patchName, checked); }); patchName.clear(); patchAuthor.clear(); diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts new file mode 100644 index 000000000..cd1ee74fb --- /dev/null +++ b/src/qt_gui/translations/sq.ts @@ -0,0 +1,902 @@ + + + + AboutDialog + + + About shadPS4 + Rreth shadPS4 + + + + shadPS4 + shadPS4 + + + + shadPS4 is an experimental open-source emulator for the PlayStation 4. + shadPS4 është një emulator eksperimental me burim të hapur për PlayStation 4. + + + + This software should not be used to play games you have not legally obtained. + Ky program nuk duhet përdorur për të luajtur lojëra që nuk ke marrë ligjërisht. + + + + ElfViewer + + + Open Folder + Hap Dosjen + + + + GameInfoClass + + + Loading game list, please wait :3 + Duke ngarkuar listën e lojërave, të lutem prit :3 + + + + Cancel + Anulo + + + + Loading... + Duke ngarkuar... + + + + GameInstallDialog + + + shadPS4 - Choose directory + shadPS4 - Përzgjidh dosjen + + + + Directory to install games + Dosja ku do instalohen lojërat + + + + Browse + Shfleto + + + + Error + Gabim + + + + The value for location to install games is not valid. + Vlera për vendndodhjen e instalimit të lojërave nuk është e vlefshme. + + + + GuiContextMenus + + + Create Shortcut + Krijo Shkurtore + + + + Open Game Folder + Hap Dosjen e Lojës + + + + Cheats / Patches + Mashtrime / Arna + + + + SFO Viewer + Shikuesi i SFO + + + + Trophy Viewer + Shikuesi i Trofeve + + + + Copy info + Kopjo informacionin + + + + Copy Name + Kopjo Emrin + + + + Copy Serial + Kopjo Serikun + + + + Copy All + Kopjo të Gjitha + + + + Shortcut creation + Krijim i shkurtores + + + + Shortcut created successfully!\n %1 + Shkurtorja u krijua me sukses!\n %1 + + + + Error + Gabim + + + + Error creating shortcut!\n %1 + Gabim në krijimin e shkurtores!\n %1 + + + + Install PKG + Instalo PKG + + + + MainWindow + + + Open/Add Elf Folder + Hap/Shto Dosje ELF + + + + Install Packages (PKG) + Instalo Paketat (PKG) + + + + Boot Game + Nis Lojën + + + + About shadPS4 + Rreth shadPS4 + + + + Configure... + Formëso... + + + + Install application from a .pkg file + Instalo aplikacionin nga skedari .pkg + + + + Recent Games + Lojërat e fundit + + + + Exit + Dil + + + + Exit shadPS4 + Dil nga shadPS4 + + + + Exit the application. + Dil nga aplikacioni. + + + + Show Game List + Shfaq Listën e Lojërave + + + + Game List Refresh + Rifresko Listën e Lojërave + + + + Tiny + Të vockla + + + + Small + Të vogla + + + + Medium + Të mesme + + + + Large + Të mëdha + + + + List View + Pamja e Listës + + + + Grid View + Pamja e Rrjetës + + + + Elf Viewer + Shikuesi i Elf + + + + Game Install Directory + Dosja e Instalimit të Lojës + + + + Download Cheats/Patches + Shkarko Mashtrimet / Arnat + + + + Dump Game List + Zbraz Listën e lojërave + + + + PKG Viewer + Shikuesi i PKG + + + + Search... + Kërko... + + + + File + Skedar + + + + View + Pamje + + + + Game List Icons + Ikonat e Listës së Lojërave + + + + Game List Mode + Mënyra e Listës së Lojërave + + + + Settings + Cilësimet + + + + Utils + Shërbime + + + + Themes + Motivet + + + + About + Rreth + + + + Dark + E errët + + + + Light + E çelët + + + + Green + E gjelbër + + + + Blue + E kaltër + + + + Violet + Vjollcë + + + + toolBar + Shiriti i veglave + + + + PKGViewer + + + Open Folder + Hap Dosjen + + + + TrophyViewer + + + Trophy Viewer + Shikuesi i Trofeve + + + + SettingsDialog + + + Settings + Cilësimet + + + + General + Të përgjithshme + + + + System + Sistemi + + + + Console Language + Gjuha e Konsolës + + + + Emulator Language + Gjuha e emulatorit + + + + Emulator + Emulatori + + + + Enable Fullscreen + Aktivizo Ekranin e plotë + + + + Show Splash + Shfaq Pamjen e nisjes + + + + Is PS4 Pro + Mënyra PS4 Pro + + + + Username + Nofka + + + + Logger + Regjistruesi i të dhënave + + + + Log Type + Lloji i Ditarit + + + + Log Filter + Filtri i Ditarit + + + + Graphics + Grafika + + + + Graphics Device + Pajisja e Grafikës + + + + Width + Gjerësia + + + + Height + Lartësia + + + + Vblank Divider + Ndarës Vblank + + + + Advanced + Të përparuara + + + + Enable Shaders Dumping + Aktivizo Zbrazjen e Shaders-ave + + + + Enable NULL GPU + Aktivizo GPU-në NULL + + + + Enable PM4 Dumping + Aktivizo Zbrazjen PM4 + + + + Debug + Korrigjim + + + + Enable Debug Dumping + Aktivizo Zbrazjen për Korrigjim + + + + Enable Vulkan Validation Layers + Aktivizo Shtresat e Vlefshmërisë Vulkan + + + + Enable Vulkan Synchronization Validation + Aktivizo Vërtetimin e Sinkronizimit Vulkan + + + + Enable RenderDoc Debugging + Aktivizo Korrigjimin RenderDoc + + + + MainWindow + + + * Unsupported Vulkan Version + * Version i pambështetur i Vulkan + + + + Download Cheats For All Installed Games + Shkarko Mashtrime Për Të Gjitha Lojërat e Instaluara + + + + Download Patches For All Games + Shkarko Arna Për Të Gjitha Lojërat e Instaluara + + + + Download Complete + Shkarkimi Përfundoi + + + + You have downloaded cheats for all the games you have installed. + Ke shkarkuar mashtrimet për të gjitha lojërat që ke instaluar. + + + + Patches Downloaded Successfully! + Arnat u shkarkuan me sukses! + + + + All Patches available for all games have been downloaded. + Të gjitha arnat e ofruara për të gjitha lojërat janë shkarkuar. + + + + Games: + Lojërat: + + + + PKG File (*.PKG) + Skedar PKG (*.PKG) + + + + ELF files (*.bin *.elf *.oelf) + Skedarë ELF (*.bin *.elf *.oelf) + + + + Game Boot + Nis Lojën + + + + Only one file can be selected! + Mund të përzgjidhet vetëm një skedar! + + + + PKG Extraction + Nxjerrja e PKG-së + + + + Patch detected! + U zbulua një arnë! + + + + PKG and Game versions match: + PKG-ja dhe versioni i Lojës përputhen: + + + + Would you like to overwrite? + Dëshiron të mbishkruash? + + + + PKG Version %1 is older than installed version: + Versioni %1 i PKG-së është më i vjetër se versioni i instaluar: + + + + Game is installed: + Loja është instaluar: + + + + Would you like to install Patch: + Dëshiron të instalosh Arnën: + + + + DLC Installation + Instalimi i DLC-ve + + + + Would you like to install DLC: %1? + Dëshiron të instalosh DLC-në: %1? + + + + DLC already installed: + DLC-ja është instaluar tashmë: + + + + Game already installed + Loja është instaluar tashmë + + + + PKG is a patch, please install the game first! + PKG-ja është një arnë, të lutem instalo lojën fillimisht! + + + + PKG ERROR + GABIM PKG + + + + Extracting PKG %1/%2 + Po nxirret PKG-ja %1/%2 + + + + Extraction Finished + Nxjerrja Përfundoi + + + + Game successfully installed at %1 + Loja u instalua me sukses në %1 + + + + File doesn't appear to be a valid PKG file + Skedari nuk duket si skedar PKG i vlefshëm + + + + CheatsPatches + + + Cheats / Patches + Mashtrime / Arna + + + + defaultTextEdit_MSG + Mashtrimet/Arnat janë eksperimentale.\nPërdori me kujdes.\n\nShkarko mashtrimet individualisht duke zgjedhur depon dhe duke klikuar butonin e shkarkimit.\nNë skedën Arna, mund t'i shkarkosh të gjitha arnat menjëherë, të zgjidhësh cilat dëshiron të përdorësh dhe të ruash zgjedhjen tënde.\n\nMeqenëse ne nuk zhvillojmë Mashtrimet/Arnat,\ntë lutem raporto problemet te autori i mashtrimit.\n\nKe krijuar një mashtrim të ri? Vizito:\nhttps://github.com/shadps4-emu/ps4_cheats + + + + No Image Available + Nuk ofrohet asnjë imazh + + + + Serial: + Seriku: + + + + Version: + Versioni: + + + + Size: + Madhësia: + + + + Select Cheat File: + Përzgjidh Skedarin e Mashtrimit: + + + + Repository: + Depo: + + + + Download Cheats + Shkarko Mashtrimet + + + + Delete File + Fshi Skedarin + + + + No files selected. + Nuk u zgjodh asnjë skedar. + + + + You can delete the cheats you don't want after downloading them. + Mund t'i fshish mashtrimet që nuk dëshiron pasi t'i kesh shkarkuar. + + + + Do you want to delete the selected file?\n%1 + Dëshiron të fshish skedarin e përzgjedhur?\n%1 + + + + Select Patch File: + Përzgjidh Skedarin e Arnës: + + + + Download Patches + Shkarko Arnat + + + + Save + Ruaj + + + + Cheats + Mashtrime + + + + Patches + Arna + + + + Error + Gabim + + + + No patch selected. + Asnjë arnë e përzgjedhur. + + + + Unable to open files.json for reading. + files.json nuk mund të hapet për lexim. + + + + No patch file found for the current serial. + Nuk u gjet asnjë skedar patch për serikun aktual. + + + + Unable to open the file for reading. + Skedari nuk mund të hapet për lexim. + + + + Unable to open the file for writing. + Skedari nuk mund të hapet për shkrim. + + + + Failed to parse XML: + Analiza e XML-së dështoi: + + + + Success + Sukses + + + + Options saved successfully. + Rregullimet u ruajtën me sukses. + + + + Invalid Source + Burim i pavlefshëm + + + + The selected source is invalid. + Burimi i përzgjedhur është i pavlefshëm. + + + + File Exists + Skedari Ekziston + + + + File already exists. Do you want to replace it? + Skedari ekziston tashmë. Dëshiron ta zëvendësosh? + + + + Failed to save file: + Ruajtja e skedarit dështoi: + + + + Failed to download file: + Shkarkimi i skedarit dështoi: + + + + Cheats Not Found + Mashtrimet nuk u gjetën + + + + CheatsNotFound_MSG + Nuk u gjetën mashtrime për këtë lojë në këtë version të depove të përzgjedhura, provo një depo tjetër ose një version tjetër të lojës. + + + + Cheats Downloaded Successfully + Mashtrimet u shkarkuan me sukses + + + + CheatsDownloadedSuccessfully_MSG + Ke shkarkuar me sukses mashtrimet për këtë version të lojës nga depoja e përzgjedhur. Mund të provosh të shkarkosh nga një depo tjetër, nëse ofrohet do të jetë e mundur gjithashtu ta përdorësh duke përzgjedhur skedarin nga lista. + + + + Failed to save: + Ruajtja dështoi: + + + + Failed to download: + Shkarkimi dështoi: + + + + Download Complete + Shkarkimi përfundoi + + + + DownloadComplete_MSG + Arnat u shkarkuan me sukses! Të gjitha arnat e ofruara për të gjitha lojërat janë shkarkuar, nuk ka nevojë t'i shkarkosh ato individualisht për secilën lojë siç ndodh me Mashtrimet. + + + + Failed to parse JSON data from HTML. + Analiza e të dhënave JSON nga HTML dështoi. + + + + Failed to retrieve HTML page. + Gjetja e faqes HTML dështoi. + + + + Failed to open file: + Hapja e skedarit dështoi: + + + + XML ERROR: + GABIM XML: + + + + Failed to open files.json for writing + Hapja e files.json për shkrim dështoi + + + + Author: + Autori: + + + + Directory does not exist: + Dosja nuk ekziston: + + + + Failed to open files.json for reading. + Hapja e files.json për lexim dështoi. + + + + Name: + Emri: + + + diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 98eac0819..c681be97c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -210,7 +210,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { } switch (program.info.stage) { case Stage::Compute: { - const std::array workgroup_size{program.info.workgroup_size}; + const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); @@ -258,8 +258,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) { - EmitContext ctx{profile, program.info, binding}; +std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + const IR::Program& program, u32& binding) { + EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (program.info.stage == Stage::Vertex) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4c862185f..aada0ff67 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const IR::Program& program, - u32& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + const IR::Program& program, u32& binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 7bdc98de9..39a214fa0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -59,7 +59,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { case IR::Attribute::Position2: case IR::Attribute::Position3: { const u32 index = u32(attr) - u32(IR::Attribute::Position1); - return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]); + return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]); } case IR::Attribute::RenderTarget0: case IR::Attribute::RenderTarget1: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 51315139f..b65cbdf46 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -41,9 +41,10 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace -EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_) - : Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage}, - binding{binding_} { +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + const Info& info_, u32& binding_) + : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, + profile{profile_}, stage{info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -168,7 +169,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f void EmitContext::DefineBufferOffsets() { for (auto& buffer : buffers) { const u32 binding = buffer.binding; - const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; const u32 offset = (binding & 0x3) << 3; const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), @@ -179,7 +180,7 @@ void EmitContext::DefineBufferOffsets() { } for (auto& tex_buffer : texture_buffers) { const u32 binding = tex_buffer.binding; - const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; const u32 offset = (binding & 0x3) << 3; const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), @@ -247,7 +248,7 @@ void EmitContext::DefineInputs() { frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); - for (const auto& input : info.ps_inputs) { + for (const auto& input : runtime_info.fs_info.inputs) { const u32 semantic = input.param_index; if (input.is_default && !input.is_flat) { input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1], @@ -554,7 +555,7 @@ void EmitContext::DefineSharedMemory() { if (!info.uses_shared) { return; } - u32 shared_memory_size = info.shared_memory_size; + u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; if (shared_memory_size == 0) { shared_memory_size = DefaultSharedMemSize; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index d3646382f..0908b7f82 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -6,9 +6,9 @@ #include #include +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" -#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::SPIRV { @@ -36,7 +36,8 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding); + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info, + u32& binding); ~EmitContext(); Id Def(const IR::Value& value); @@ -125,6 +126,7 @@ public: } const Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; Stage stage{}; diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index fefc623fc..bf5ba6bce 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -602,13 +602,14 @@ public: Common::ObjectPool& block_pool_, Common::ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Info& info_, const Profile& profile_) + Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, + runtime_info{runtime_info_}, profile{profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - Translator{&first_block, info, profile}.EmitPrologue(); + Translator{&first_block, info, runtime_info, profile}.EmitPrologue(); } private: @@ -637,7 +638,7 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), - info, profile); + info, runtime_info, profile); } break; } @@ -817,19 +818,20 @@ private: const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, Common::ObjectPool& block_pool, CFG& cfg, Info& info, - const Profile& profile) { + const RuntimeInfo& runtime_info, const Profile& profile) { Common::ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, - syntax_list, cfg.inst_list, info, profile}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, + cfg.inst_list, info, runtime_info, profile}; ASSERT_MSG(!info.translation_failed, "Shader translation has failed"); return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index f5a540518..2119484e3 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -11,12 +11,14 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, Common::ObjectPool& block_pool, CFG& cfg, - Info& info, const Profile& profile); + Info& info, const RuntimeInfo& runtime_info, + const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index d80de002c..d4db09a64 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -19,12 +20,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; + const auto swizzle = [&](u32 comp) { + if (!IR::IsMrt(attrib)) { + return comp; + } + const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); + switch (runtime_info.fs_info.mrt_swizzles[index]) { + case MrtSwizzle::Identity: + return comp; + case MrtSwizzle::Alt: + static constexpr std::array AltSwizzle = {2, 1, 0, 3}; + return AltSwizzle[comp]; + default: + UNREACHABLE(); + } + }; + const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, idx * 2); - ir.SetAttribute(attrib, g, idx * 2 + 1); + ir.SetAttribute(attrib, r, swizzle(idx * 2)); + ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); }; // Components are float16 packed into a VGPR @@ -45,7 +62,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, i); + ir.SetAttribute(attrib, comp, swizzle(i)); } } } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index eb86310b8..b33746c7b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" @@ -16,8 +17,9 @@ namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_) - : ir{*block_, block_->begin()}, info{info_}, profile{profile_} {} +Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_, + const Profile& profile_) + : ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {} void Translator::EmitPrologue() { ir.Prologue(); @@ -25,7 +27,7 @@ void Translator::EmitPrologue() { // Initialize user data. IR::ScalarReg dst_sreg = IR::ScalarReg::S0; - for (u32 i = 0; i < info.num_user_data; i++) { + for (u32 i = 0; i < runtime_info.num_user_data; i++) { ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg)); ++dst_sreg; } @@ -36,15 +38,15 @@ void Translator::EmitPrologue() { // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 - if (info.num_input_vgprs > 0) { + if (runtime_info.num_input_vgprs > 0) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0)); } // v2: instance ID, step rate 1 - if (info.num_input_vgprs > 1) { + if (runtime_info.num_input_vgprs > 1) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1)); } // v3: instance ID, plain - if (info.num_input_vgprs > 2) { + if (runtime_info.num_input_vgprs > 2) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; @@ -64,13 +66,13 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); - if (info.tgid_enable[0]) { + if (runtime_info.cs_info.tgid_enable[0]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0)); } - if (info.tgid_enable[1]) { + if (runtime_info.cs_info.tgid_enable[1]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1)); } - if (info.tgid_enable[2]) { + if (runtime_info.cs_info.tgid_enable[2]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; @@ -445,7 +447,6 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { } void Translator::LogMissingOpcode(const GcnInst& inst) { - const u32 opcode = u32(inst.opcode); LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})", magic_enum::enum_name(inst.opcode), u32(inst.opcode), magic_enum::enum_name(inst.category)); @@ -453,11 +454,11 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { } void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info, - const Profile& profile) { + const RuntimeInfo& runtime_info, const Profile& profile) { if (inst_list.empty()) { return; } - Translator translator{block, info, profile}; + Translator translator{block, info, runtime_info, profile}; for (const auto& inst : inst_list) { pc += inst.length; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index f1619e810..0c1f3a587 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -5,9 +5,9 @@ #include #include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" -#include "shader_recompiler/runtime_info.h" namespace Shader { struct Info; @@ -55,7 +55,8 @@ enum class NegateMode : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Info& info, const Profile& profile); + explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info, + const Profile& profile); // Instruction categories void EmitPrologue(); @@ -237,12 +238,13 @@ private: private: IR::IREmitter ir; Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; IR::U32 m0_value; bool opcode_missing = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, - const Profile& profile); + const RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7fef91377..a07e70785 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -479,10 +479,11 @@ void Translator::V_ADD_F32(const GcnInst& inst) { void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg( - dst_reg, - ir.FPMul(ir.ConvertUToF(32, 32, ir.ISub(ir.BitwiseAnd(src0, ir.Imm32(0xF)), ir.Imm32(8))), - ir.Imm32(1.f / 16.f))); + ASSERT(src0.IsImmediate()); + static constexpr std::array IntToFloat = { + 0.0f, 0.0625f, 0.1250f, 0.1875f, 0.2500f, 0.3125f, 0.3750f, 0.4375f, + -0.5000f, -0.4375f, -0.3750f, -0.3125f, -0.2500f, -0.1875f, -0.1250f, -0.0625f}; + ir.SetVectorReg(dst_reg, ir.Imm32(IntToFloat[src0.U32() & 0xF])); } void Translator::V_MED3_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 4ff846cf8..c12ae8f57 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -7,14 +7,14 @@ namespace Shader::Gcn { void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h new file mode 100644 index 000000000..cdc17304c --- /dev/null +++ b/src/shader_recompiler/info.h @@ -0,0 +1,232 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/params.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader { + +static constexpr size_t NumUserDataRegs = 16; + +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + Buffer, +}; +constexpr u32 NUM_TEXTURE_TYPES = 7; + +struct Info; + +struct BufferResource { + u32 sgpr_base; + u32 dword_offset; + IR::Type used_types; + AmdGpu::Buffer inline_cbuf; + bool is_instance_data{}; + bool is_written{}; + + bool IsStorage(AmdGpu::Buffer buffer) const noexcept { + static constexpr size_t MaxUboSize = 65536; + return buffer.GetSize() > MaxUboSize || is_written; + } + + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; +}; +using BufferResourceList = boost::container::small_vector; + +struct TextureBufferResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::NumberFormat nfmt; + bool is_written{}; + + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; +}; +using TextureBufferResourceList = boost::container::small_vector; + +struct ImageResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::ImageType type; + AmdGpu::NumberFormat nfmt; + bool is_storage; + bool is_depth; + bool is_atomic{}; + + constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; +}; +using ImageResourceList = boost::container::small_vector; + +struct SamplerResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::Sampler inline_sampler{}; + u32 associated_image : 4; + u32 disable_aniso : 1; + + constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; +}; +using SamplerResourceList = boost::container::small_vector; + +struct PushData { + static constexpr size_t BufOffsetIndex = 2; + + u32 step0; + u32 step1; + std::array buf_offsets; + + void AddOffset(u32 binding, u32 offset) { + ASSERT(offset < 256 && binding < buf_offsets.size()); + buf_offsets[binding] = offset; + } +}; + +/** + * Contains general information generated by the shader recompiler for an input program. + */ +struct Info { + struct VsInput { + enum InstanceIdType : u8 { + None = 0, + OverStepRate0 = 1, + OverStepRate1 = 2, + Plain = 3, + }; + + AmdGpu::NumberFormat fmt; + u16 binding; + u16 num_components; + u8 sgpr_base; + u8 dword_offset; + InstanceIdType instance_step_rate; + s32 instance_data_buf; + }; + boost::container::static_vector vs_inputs{}; + + struct AttributeFlags { + bool Get(IR::Attribute attrib, u32 comp = 0) const { + return flags[Index(attrib)] & (1 << comp); + } + + bool GetAny(IR::Attribute attrib) const { + return flags[Index(attrib)]; + } + + void Set(IR::Attribute attrib, u32 comp = 0) { + flags[Index(attrib)] |= (1 << comp); + } + + u32 NumComponents(IR::Attribute attrib) const { + return 4; + } + + static size_t Index(IR::Attribute attrib) { + return static_cast(attrib); + } + + std::array flags; + }; + AttributeFlags loads{}; + AttributeFlags stores{}; + + s8 vertex_offset_sgpr = -1; + s8 instance_offset_sgpr = -1; + + BufferResourceList buffers; + TextureBufferResourceList texture_buffers; + ImageResourceList images; + SamplerResourceList samplers; + + std::span user_data; + Stage stage; + + u64 pgm_hash{}; + VAddr pgm_base; + bool has_storage_images{}; + bool has_image_buffers{}; + bool has_texel_buffers{}; + bool has_discard{}; + bool has_image_gather{}; + bool has_image_query{}; + bool uses_lane_id{}; + bool uses_group_quad{}; + bool uses_shared{}; + bool uses_fp16{}; + bool uses_step_rates{}; + bool translation_failed{}; // indicates that shader has unsupported instructions + + explicit Info(Stage stage_, ShaderParams params) + : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + user_data{params.user_data} {} + + template + T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { + T data; + const u32* base = user_data.data(); + if (ptr_index != IR::NumScalarRegs) { + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + } + std::memcpy(&data, base + dword_offset, sizeof(T)); + return data; + } + + size_t NumBindings() const noexcept { + return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); + } + + [[nodiscard]] std::pair GetDrawOffsets() const noexcept { + u32 vertex_offset = 0; + u32 instance_offset = 0; + if (vertex_offset_sgpr != -1) { + vertex_offset = user_data[vertex_offset_sgpr]; + } + if (instance_offset_sgpr != -1) { + instance_offset = user_data[instance_offset_sgpr]; + } + return {vertex_offset, instance_offset}; + } +}; + +constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { + return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { + return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); +} + +} // namespace Shader + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::Stage stage, format_context& ctx) const { + constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; + return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); + } +}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f446ac476..025bb98c8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -4,11 +4,11 @@ #include #include #include "common/alignment.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" -#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -471,14 +471,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle); - const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); + auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); if (!image.Valid()) { LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.ReplaceUsesWith( - ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f))); - return; + image = AmdGpu::Image::Null(); } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_storage = IsImageStorageInstruction(inst); diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f7abba641..84a1a2d40 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -5,9 +5,9 @@ #include #include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/runtime_info.h" namespace Shader::IR { diff --git a/src/shader_recompiler/params.h b/src/shader_recompiler/params.h new file mode 100644 index 000000000..0dce9a0f3 --- /dev/null +++ b/src/shader_recompiler/params.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader { + +/** + * Compilation parameters used to identify and locate a guest shader program. + */ +struct ShaderParams { + static constexpr u32 NumShaderUserData = 16; + + std::span user_data; + std::span code; + u64 hash; + + VAddr Base() const noexcept { + return reinterpret_cast(code.data()); + } +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index dfcf9ed1b..12dbc6c1b 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/recompiler.h" namespace Shader { @@ -27,29 +28,32 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(Common::ObjectPool& inst_pool, - Common::ObjectPool& block_pool, std::span token, - Info& info, const Profile& profile) { +IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, + const RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; - ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); + ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); - Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); + Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size()); Gcn::GcnDecodeContext decoder; // Decode and save instructions IR::Program program{info}; - program.ins_list.reserve(token.size()); + program.ins_list.reserve(code.size()); while (!slice.atEnd()) { program.ins_list.emplace_back(decoder.decodeInstruction(slice)); } + // Clear any previous pooled data. + pools.ReleaseContents(); + // Create control flow graph Common::ObjectPool gcn_block_pool{64}; Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile); + program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, + program.info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); @@ -63,7 +67,6 @@ IR::Program TranslateProgram(Common::ObjectPool& inst_pool, Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); - LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program)); return program; } diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 3a2295189..f8acf6c9e 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -10,10 +10,24 @@ namespace Shader { struct Profile; +struct RuntimeInfo; -[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool& inst_pool, - Common::ObjectPool& block_pool, - std::span code, Info& info, - const Profile& profile); +struct Pools { + static constexpr u32 InstPoolSize = 8192; + static constexpr u32 BlockPoolSize = 32; + + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; + + explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {} + + void ReleaseContents() { + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + } +}; + +[[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, + const RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 77c57e947..37fd64bb1 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,20 +3,14 @@ #pragma once -#include -#include +#include #include + #include "common/assert.h" #include "common/types.h" -#include "shader_recompiler/ir/attribute.h" -#include "shader_recompiler/ir/reg.h" -#include "shader_recompiler/ir/type.h" -#include "video_core/amdgpu/resource.h" namespace Shader { -static constexpr size_t NumUserDataRegs = 16; - enum class Stage : u32 { Fragment, Vertex, @@ -29,21 +23,18 @@ enum class Stage : u32 { constexpr u32 MaxStageTypes = 6; [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { - return static_cast(static_cast(Stage::Vertex) + index); + return static_cast(index); } -enum class TextureType : u32 { - Color1D, - ColorArray1D, - Color2D, - ColorArray2D, - Color3D, - ColorCube, - Buffer, +enum class MrtSwizzle : u8 { + Identity = 0, + Alt = 1, + Reverse = 2, + ReverseAlt = 3, }; -constexpr u32 NUM_TEXTURE_TYPES = 7; +static constexpr u32 MaxColorBuffers = 8; -enum class VsOutput : u32 { +enum class VsOutput : u8 { None, PointSprite, EdgeFlag, @@ -70,211 +61,69 @@ enum class VsOutput : u32 { }; using VsOutputMap = std::array; -struct Info; +struct VertexRuntimeInfo { + boost::container::static_vector outputs; -struct BufferResource { - u32 sgpr_base; - u32 dword_offset; - IR::Type used_types; - AmdGpu::Buffer inline_cbuf; - bool is_instance_data{}; - bool is_written{}; - - bool IsStorage(AmdGpu::Buffer buffer) const noexcept { - static constexpr size_t MaxUboSize = 65536; - return buffer.GetSize() > MaxUboSize || is_written; - } - - constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using BufferResourceList = boost::container::small_vector; - -struct TextureBufferResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::NumberFormat nfmt; - bool is_written{}; - - constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using TextureBufferResourceList = boost::container::small_vector; - -struct ImageResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::ImageType type; - AmdGpu::NumberFormat nfmt; - bool is_storage; - bool is_depth; - bool is_atomic{}; - - constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; -}; -using ImageResourceList = boost::container::small_vector; - -struct SamplerResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::Sampler inline_sampler{}; - u32 associated_image : 4; - u32 disable_aniso : 1; - - constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; -}; -using SamplerResourceList = boost::container::small_vector; - -struct PushData { - static constexpr size_t BufOffsetIndex = 2; - - u32 step0; - u32 step1; - std::array buf_offsets; - - void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 256 && binding < buf_offsets.size()); - buf_offsets[binding] = offset; + bool operator==(const VertexRuntimeInfo& other) const noexcept { + return true; } }; -struct Info { - struct VsInput { - enum InstanceIdType : u8 { - None = 0, - OverStepRate0 = 1, - OverStepRate1 = 2, - Plain = 3, - }; - - AmdGpu::NumberFormat fmt; - u16 binding; - u16 num_components; - u8 sgpr_base; - u8 dword_offset; - InstanceIdType instance_step_rate; - s32 instance_data_buf; - }; - boost::container::static_vector vs_inputs{}; - +struct FragmentRuntimeInfo { struct PsInput { - u32 param_index; + u8 param_index; bool is_default; bool is_flat; - u32 default_value; + u8 default_value; + + auto operator<=>(const PsInput&) const noexcept = default; }; - boost::container::static_vector ps_inputs{}; + boost::container::static_vector inputs; + std::array mrt_swizzles; - struct AttributeFlags { - bool Get(IR::Attribute attrib, u32 comp = 0) const { - return flags[Index(attrib)] & (1 << comp); - } + bool operator==(const FragmentRuntimeInfo& other) const noexcept { + return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) && + std::ranges::equal(inputs, other.inputs); + } +}; - bool GetAny(IR::Attribute attrib) const { - return flags[Index(attrib)]; - } - - void Set(IR::Attribute attrib, u32 comp = 0) { - flags[Index(attrib)] |= (1 << comp); - } - - u32 NumComponents(IR::Attribute attrib) const { - return 4; - } - - static size_t Index(IR::Attribute attrib) { - return static_cast(attrib); - } - - std::array flags; - }; - AttributeFlags loads{}; - AttributeFlags stores{}; - boost::container::static_vector vs_outputs; - - s8 vertex_offset_sgpr = -1; - s8 instance_offset_sgpr = -1; - - BufferResourceList buffers; - TextureBufferResourceList texture_buffers; - ImageResourceList images; - SamplerResourceList samplers; - - std::array workgroup_size{}; +struct ComputeRuntimeInfo { + u32 shared_memory_size; + std::array workgroup_size; std::array tgid_enable; + bool operator==(const ComputeRuntimeInfo& other) const noexcept { + return workgroup_size == other.workgroup_size && tgid_enable == other.tgid_enable; + } +}; + +/** + * Stores information relevant to shader compilation sourced from liverpool registers. + * It may potentially differ with the same shader module so must be checked. + * It's also possible to store any other custom information that needs to be part of shader key. + */ +struct RuntimeInfo { + Stage stage; u32 num_user_data; u32 num_input_vgprs; - std::span user_data; - Stage stage; + VertexRuntimeInfo vs_info; + FragmentRuntimeInfo fs_info; + ComputeRuntimeInfo cs_info; - uintptr_t pgm_base{}; - u64 pgm_hash{}; - u32 shared_memory_size{}; - bool has_storage_images{}; - bool has_image_buffers{}; - bool has_texel_buffers{}; - bool has_discard{}; - bool has_image_gather{}; - bool has_image_query{}; - bool uses_lane_id{}; - bool uses_group_quad{}; - bool uses_shared{}; - bool uses_fp16{}; - bool uses_step_rates{}; - bool translation_failed{}; // indicates that shader has unsupported instructions + RuntimeInfo(Stage stage_) : stage{stage_} {} - template - T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { - T data; - const u32* base = user_data.data(); - if (ptr_index != IR::NumScalarRegs) { - std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + bool operator==(const RuntimeInfo& other) const noexcept { + switch (stage) { + case Stage::Fragment: + return fs_info == other.fs_info; + case Stage::Vertex: + return vs_info == other.vs_info; + case Stage::Compute: + return cs_info == other.cs_info; + default: + return true; } - std::memcpy(&data, base + dword_offset, sizeof(T)); - return data; - } - - size_t NumBindings() const noexcept { - return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); - } - - [[nodiscard]] std::pair GetDrawOffsets() const noexcept { - u32 vertex_offset = 0; - u32 instance_offset = 0; - if (vertex_offset_sgpr != -1) { - vertex_offset = user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = user_data[instance_offset_sgpr]; - } - return {vertex_offset, instance_offset}; } }; -constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { - return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { - return info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - return info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); -} - } // namespace Shader - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - auto format(const Shader::Stage stage, format_context& ctx) const { - constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; - return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); - } -}; diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.h b/src/shader_recompiler/specialization.h similarity index 56% rename from src/video_core/renderer_vulkan/vk_shader_cache.h rename to src/shader_recompiler/specialization.h index 191e1b08c..3dd75dbd7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_cache.h +++ b/src/shader_recompiler/specialization.h @@ -4,18 +4,11 @@ #pragma once #include -#include -#include -#include "common/object_pool.h" -#include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/liverpool.h" -#include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +#include "common/types.h" +#include "shader_recompiler/info.h" -class Instance; +namespace Shader { struct BufferSpecialization { u16 stride : 14; @@ -25,43 +18,38 @@ struct BufferSpecialization { }; struct TextureBufferSpecialization { - bool is_integer; + bool is_integer = false; auto operator<=>(const TextureBufferSpecialization&) const = default; }; struct ImageSpecialization { - AmdGpu::ImageType type; - bool is_integer; + AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; + bool is_integer = false; auto operator<=>(const ImageSpecialization&) const = default; }; +/** + * Alongside runtime information, this structure also checks bound resources + * for compatibility. Can be used as a key for storing shader permutations. + * Is separate from runtime information, because resource layout can only be deduced + * after the first compilation of a module. + */ struct StageSpecialization { static constexpr size_t MaxStageResources = 32; const Shader::Info* info; + RuntimeInfo runtime_info; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; u32 start_binding{}; - void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { - for (const auto& desc : desc_list) { - auto& spec = spec_list.emplace_back(); - const auto sharp = desc.GetSharp(*info); - if (!sharp) { - binding++; - continue; - } - bitset.set(binding++); - func(spec, desc, sharp); - } - } - - StageSpecialization(const Shader::Info& info_, u32 start_binding_) - : info{&info_}, start_binding{start_binding_} { + explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, + u32 start_binding_) + : info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} { u32 binding{}; ForEachSharp(binding, buffers, info->buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { @@ -79,10 +67,26 @@ struct StageSpecialization { }); } + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + binding++; + continue; + } + bitset.set(binding++); + func(spec, desc, sharp); + } + } + bool operator==(const StageSpecialization& other) const { if (start_binding != other.start_binding) { return false; } + if (runtime_info != other.runtime_info) { + return false; + } u32 binding{}; for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { @@ -103,54 +107,4 @@ struct StageSpecialization { } }; -struct Program { - struct Module { - vk::ShaderModule module; - StageSpecialization spec; - }; - - Shader::Info info; - boost::container::small_vector modules; - - explicit Program(const Shader::Info& info_) : info{info_} {} -}; - -struct GuestProgram { - Shader::Stage stage; - std::span user_data; - std::span code; - u64 hash; - - explicit GuestProgram(const auto* pgm, Shader::Stage stage_) - : stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} { - const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm); - hash = bininfo->shader_hash; - } -}; - -class ShaderCache { -public: - explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool); - ~ShaderCache() = default; - - std::tuple GetProgram(const GuestProgram& pgm, - u32& binding); - -private: - void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, - std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, - u32& binding); - Program* CreateProgram(const GuestProgram& pgm, u32& binding); - -private: - const Instance& instance; - AmdGpu::Liverpool* liverpool; - Shader::Profile profile{}; - tsl::robin_map program_cache; - Common::ObjectPool inst_pool; - Common::ObjectPool block_pool; - Common::ObjectPool program_pool; -}; - -} // namespace Vulkan +} // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2a595516d..cee30f755 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -20,6 +20,20 @@ static const char* acb_task_name{"ACB_TASK"}; std::array Liverpool::ConstantEngine::constants_heap; +static std::span NextPacket(std::span span, size_t offset) { + if (offset > span.size()) { + LOG_ERROR( + Lib_GnmDriver, + ": packet length exceeds remaining submission size. Packet dword count={}, remaining " + "submission dwords={}", + offset, span.size()); + // Return empty subspan so check for next packet bails out + return {}; + } + + return span.subspan(offset); +} + Liverpool::Liverpool() { process_thread = std::jthread{std::bind_front(&Liverpool::Process, this)}; } @@ -150,7 +164,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); } - ccb = ccb.subspan(header->type3.NumWords() + 1); + ccb = NextPacket(ccb, header->type3.NumWords() + 1); } TracyFiberLeave; @@ -184,7 +198,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spantype3.NumWords(); @@ -333,7 +347,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true); rasterizer->ScopeMarkerEnd(); } @@ -349,7 +362,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true, draw_index_off->index_offset); rasterizer->ScopeMarkerEnd(); } @@ -362,7 +374,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(false); rasterizer->ScopeMarkerEnd(); } @@ -376,7 +387,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DrawIndirect(false, ib_address, offset, size); rasterizer->ScopeMarkerEnd(); } @@ -392,7 +402,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DrawIndexIndirect", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DrawIndirect(true, ib_address, offset, size); rasterizer->ScopeMarkerEnd(); } @@ -407,7 +416,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -423,7 +431,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DispatchIndirect", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchIndirect(ib_address, offset, size); rasterizer->ScopeMarkerEnd(); } @@ -525,7 +532,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(opcode), count); } - dcb = dcb.subspan(header->type3.NumWords() + 1); + dcb = NextPacket(dcb, header->type3.NumWords() + 1); break; } } @@ -588,7 +595,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -627,7 +633,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { static_cast(opcode), count); } - acb = acb.subspan(header->type3.NumWords() + 1); + acb = NextPacket(acb, header->type3.NumWords() + 1); } TracyFiberLeave; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 7f262e1f4..37720168a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -18,6 +18,7 @@ #include "common/polyfill_thread.h" #include "common/types.h" #include "common/unique_function.h" +#include "shader_recompiler/params.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -171,6 +172,15 @@ struct Liverpool { return bininfo; } + static constexpr Shader::ShaderParams GetParams(const auto& sh) { + auto* bininfo = GetBinaryInfo(sh); + return { + .user_data = sh.user_data, + .code = sh.Code(), + .hash = bininfo->shader_hash, + }; + } + union PsInputControl { u32 raw; BitField<0, 5, u32> input_offset; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index b85a3788b..1721c1aea 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -176,6 +176,18 @@ struct Image { u64 lod_hw_cnt_en : 1; u64 : 43; + static constexpr Image Null() { + Image image{}; + image.data_format = u64(DataFormat::Format8_8_8_8); + image.dst_sel_x = 4; + image.dst_sel_y = 5; + image.dst_sel_z = 6; + image.dst_sel_w = 7; + image.tiling_index = u64(TilingMode::Texture_MicroTiled); + image.type = u64(ImageType::Color2D); + return image; + } + bool Valid() const { return (type & 0x8u) != 0; } diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index 372b6f745..adcea000b 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -9,7 +9,10 @@ #include "video_core/renderer_vulkan/vk_platform.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnullability-completeness" #include +#pragma GCC diagnostic pop namespace VideoCore { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 71228786e..93e05085d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -4,7 +4,7 @@ #include #include "common/alignment.h" #include "common/scope_exit.h" -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 358a00447..40a1124a6 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -600,6 +600,8 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, return is_vo_surface ? vk::Format::eB8G8R8A8Unorm : vk::Format::eB8G8R8A8Srgb; case vk::Format::eB8G8R8A8Srgb: return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb; + case vk::Format::eA2B10G10R10UnormPack32: + return vk::Format::eA2R10G10B10UnormPack32; default: break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index b12708088..f1c81b6e2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -11,7 +11,10 @@ #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/texture_cache/image.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnullability-completeness" #include +#pragma GCC diagnostic pop namespace Vulkan { @@ -65,8 +68,10 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_) : window{window_}, liverpool{liverpool_}, - instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, draw_scheduler{instance}, - present_scheduler{instance}, flip_scheduler{instance}, swapchain{instance, window}, + instance{window, Config::getGpuId(), Config::vkValidationEnabled(), + Config::vkCrashDiagnosticEnabled()}, + draw_scheduler{instance}, present_scheduler{instance}, flip_scheduler{instance}, + swapchain{instance, window}, rasterizer{std::make_unique(instance, draw_scheduler, liverpool)}, texture_cache{rasterizer->GetTextureCache()} { const u32 num_images = swapchain.GetImageCount(); @@ -354,7 +359,7 @@ Frame* RendererVulkan::GetRenderFrame() { { std::unique_lock lock{free_mutex}; free_cv.wait(lock, [this] { return !free_queue.empty(); }); - LOG_INFO(Render_Vulkan, "Got render frame, remaining {}", free_queue.size() - 1); + LOG_DEBUG(Render_Vulkan, "Got render frame, remaining {}", free_queue.size() - 1); // Take the frame from the queue frame = free_queue.front(); diff --git a/src/video_core/renderer_vulkan/vk_common.cpp b/src/video_core/renderer_vulkan/vk_common.cpp index e9265ea9c..0823fd23d 100644 --- a/src/video_core/renderer_vulkan/vk_common.cpp +++ b/src/video_core/renderer_vulkan/vk_common.cpp @@ -5,7 +5,10 @@ // Implement vma functions #define VMA_IMPLEMENTATION +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnullability-completeness" #include +#pragma GCC diagnostic pop // Store the dispatch loader here VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 7db6fb06d..3e048749f 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -3,6 +3,10 @@ #pragma once +#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER +#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 +#endif + // Include vulkan-hpp header #define VK_ENABLE_BETA_EXTENSIONS #define VK_NO_PROTOTYPES diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0132066c5..54eaf6532 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,7 @@ #pragma once #include -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" namespace VideoCore { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3e51e6529..c06ddd204 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -25,6 +25,7 @@ using Liverpool = AmdGpu::Liverpool; struct GraphicsPipelineKey { std::array stage_hashes; std::array color_formats; + std::array mrt_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 34727d27e..56ab229ce 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -14,7 +14,10 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_platform.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnullability-completeness" #include +#pragma GCC diagnostic pop namespace Vulkan { @@ -46,14 +49,15 @@ std::string GetReadableVersion(u32 version) { } // Anonymous namespace -Instance::Instance(bool enable_validation, bool dump_command_buffers) - : instance{CreateInstance(dl, Frontend::WindowSystemType::Headless, enable_validation, - dump_command_buffers)}, +Instance::Instance(bool enable_validation, bool enable_crash_diagnostic) + : instance{CreateInstance(Frontend::WindowSystemType::Headless, enable_validation, + enable_crash_diagnostic)}, physical_devices{instance->enumeratePhysicalDevices()} {} Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index, - bool enable_validation /*= false*/) - : instance{CreateInstance(dl, window.getWindowInfo().type, enable_validation, false)}, + bool enable_validation /*= false*/, bool enable_crash_diagnostic /*= false*/) + : instance{CreateInstance(window.getWindowInfo().type, enable_validation, + enable_crash_diagnostic)}, physical_devices{instance->enumeratePhysicalDevices()} { if (enable_validation) { debug_callback = CreateDebugCallback(*instance); @@ -118,11 +122,15 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index, // Check and log format support details. for (const auto& key : format_properties | std::views::keys) { const auto format = key; - if (!IsFormatSupported(format)) { + if (!IsImageFormatSupported(format)) { const auto alternative = GetAlternativeFormat(format); - if (IsFormatSupported(alternative)) { - LOG_WARNING(Render_Vulkan, "Format {} is not supported, falling back to {}", + if (IsImageFormatSupported(alternative)) { + LOG_WARNING(Render_Vulkan, + "Format {} is not supported for images, falling back to {}.", vk::to_string(format), vk::to_string(alternative)); + } else if (IsVertexFormatSupported(format)) { + LOG_WARNING(Render_Vulkan, "Format {} is only supported for vertex buffers.", + vk::to_string(format)); } else { LOG_ERROR(Render_Vulkan, "Format {} is not supported and no suitable alternative is supported.", @@ -221,13 +229,6 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); - if (Config::isMarkersEnabled()) { - const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); - if (has_sync2) { - has_nv_checkpoints = add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME); - } - } - #ifdef __APPLE__ // Required by Vulkan spec if supported. add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); @@ -479,7 +480,7 @@ void Instance::CollectToolingInfo() { } } -bool Instance::IsFormatSupported(const vk::Format format) const { +bool Instance::IsImageFormatSupported(const vk::Format format) const { if (format == vk::Format::eUndefined) [[unlikely]] { return true; } @@ -495,6 +496,20 @@ bool Instance::IsFormatSupported(const vk::Format format) const { return (it->second.optimalTilingFeatures & optimal_flags) == optimal_flags; } +bool Instance::IsVertexFormatSupported(const vk::Format format) const { + if (format == vk::Format::eUndefined) [[unlikely]] { + return true; + } + + const auto it = format_properties.find(format); + if (it == format_properties.end()) { + UNIMPLEMENTED_MSG("Properties of format {} have not been queried.", vk::to_string(format)); + } + + constexpr vk::FormatFeatureFlags optimal_flags = vk::FormatFeatureFlagBits::eVertexBuffer; + return (it->second.bufferFeatures & optimal_flags) == optimal_flags; +} + vk::Format Instance::GetAlternativeFormat(const vk::Format format) const { if (format == vk::Format::eB5G6R5UnormPack16) { return vk::Format::eR5G6B5UnormPack16; @@ -505,11 +520,11 @@ vk::Format Instance::GetAlternativeFormat(const vk::Format format) const { } vk::Format Instance::GetSupportedFormat(const vk::Format format) const { - if (IsFormatSupported(format)) [[likely]] { + if (IsImageFormatSupported(format)) [[likely]] { return format; } const vk::Format alternative = GetAlternativeFormat(format); - if (IsFormatSupported(alternative)) [[likely]] { + if (IsImageFormatSupported(alternative)) [[likely]] { return alternative; } return format; @@ -517,7 +532,7 @@ vk::Format Instance::GetSupportedFormat(const vk::Format format) const { vk::ComponentMapping Instance::GetSupportedComponentSwizzle(vk::Format format, vk::ComponentMapping swizzle) const { - if (IsFormatSupported(format)) [[likely]] { + if (IsImageFormatSupported(format)) [[likely]] { return swizzle; } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 5f985d4ae..ee36d23e6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -17,19 +17,13 @@ class WindowSDL; VK_DEFINE_HANDLE(VmaAllocator) -#ifdef __APPLE__ -#define VULKAN_LIBRARY_NAME "libMoltenVK.dylib" -#else -#define VULKAN_LIBRARY_NAME -#endif - namespace Vulkan { class Instance { public: - explicit Instance(bool validation = false, bool dump_command_buffers = false); + explicit Instance(bool validation = false, bool crash_diagnostic = false); explicit Instance(Frontend::WindowSDL& window, s32 physical_device_index, - bool enable_validation = false); + bool enable_validation = false, bool enable_crash_diagnostic = false); ~Instance(); /// Returns a formatted string for the driver version @@ -88,10 +82,6 @@ public: return profiler_context; } - bool HasNvCheckpoints() const { - return has_nv_checkpoints; - } - /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -233,14 +223,16 @@ private: void CollectDeviceParameters(); void CollectToolingInfo(); - /// Determines if a format is supported. - [[nodiscard]] bool IsFormatSupported(vk::Format format) const; + /// Determines if a format is supported for images. + [[nodiscard]] bool IsImageFormatSupported(vk::Format format) const; + + /// Determines if a format is supported for vertex buffers. + [[nodiscard]] bool IsVertexFormatSupported(vk::Format format) const; /// Gets a commonly available alternative for an unsupported pixel format. vk::Format GetAlternativeFormat(const vk::Format format) const; private: - vk::DynamicLoader dl{VULKAN_LIBRARY_NAME}; vk::UniqueInstance instance; vk::PhysicalDevice physical_device; vk::UniqueDevice device; @@ -274,7 +266,6 @@ private: bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; - bool has_nv_checkpoints{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f8de5ffeb..364c2b4f1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,21 +1,124 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "shader_recompiler/runtime_info.h" +#include + +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" extern std::unique_ptr renderer; namespace Vulkan { +using Shader::VsOutput; + +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + +void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, + const AmdGpu::Liverpool::VsOutputControl& ctl) { + const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { + if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || + w != VsOutput::None) { + info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + } + }; + // VS_OUT_MISC_VEC + add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, + ctl.use_vtx_edge_flag + ? VsOutput::EdgeFlag + : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), + ctl.use_vtx_kill_flag + ? VsOutput::KillFlag + : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), + ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); + // VS_OUT_CCDIST0 + add_output(ctl.IsClipDistEnabled(0) + ? VsOutput::ClipDist0 + : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), + ctl.IsClipDistEnabled(1) + ? VsOutput::ClipDist1 + : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), + ctl.IsClipDistEnabled(2) + ? VsOutput::ClipDist2 + : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), + ctl.IsClipDistEnabled(3) + ? VsOutput::ClipDist3 + : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); + // VS_OUT_CCDIST1 + add_output(ctl.IsClipDistEnabled(4) + ? VsOutput::ClipDist4 + : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), + ctl.IsClipDistEnabled(5) + ? VsOutput::ClipDist5 + : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), + ctl.IsClipDistEnabled(6) + ? VsOutput::ClipDist6 + : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), + ctl.IsClipDistEnabled(7) + ? VsOutput::ClipDist7 + : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); +} + +Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, const GraphicsPipelineKey& key, + const AmdGpu::Liverpool::Regs& regs) { + auto info = Shader::RuntimeInfo{stage}; + switch (stage) { + case Shader::Stage::Vertex: { + info.num_user_data = regs.vs_program.settings.num_user_regs; + info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + GatherVertexOutputs(info.vs_info, regs.vs_output_control); + break; + } + case Shader::Stage::Fragment: { + info.num_user_data = regs.ps_program.settings.num_user_regs; + std::ranges::transform(key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), + [](Liverpool::ColorBuffer::SwapMode mode) { + return static_cast(mode); + }); + for (u32 i = 0; i < regs.num_interp; i++) { + info.fs_info.inputs.push_back({ + .param_index = u8(regs.ps_inputs[i].input_offset.Value()), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = u8(regs.ps_inputs[i].default_value), + }); + } + break; + } + case Shader::Stage::Compute: { + const auto& cs_pgm = regs.cs_program; + info.num_user_data = cs_pgm.settings.num_user_regs; + info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, + cs_pgm.num_thread_z.full}; + info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), + cs_pgm.IsTgidEnabled(2)}; + info.cs_info.shared_memory_size = cs_pgm.SharedMemSize(); + break; + } + default: + break; + } + return info; +} + PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, - shader_cache{std::make_unique(instance, liverpool)} { + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} { + profile = Shader::Profile{ + .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, + .subgroup_size = instance.SubgroupSize(), + .support_explicit_workgroup_layout = true, + }; pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); } @@ -134,6 +237,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_formats.fill(vk::Format::eUndefined); key.blend_controls.fill({}); key.write_masks.fill({}); + key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); int remapped_cb{}; for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -142,9 +246,12 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto base_format = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf); + const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + if (base_format == key.color_formats[remapped_cb]) { + key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); + } key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); @@ -169,6 +276,7 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); if (!bininfo->Valid()) { + LOG_WARNING(Render_Vulkan, "Invalid binary info structure!"); key.stage_hashes[i] = 0; infos[i] = nullptr; continue; @@ -176,10 +284,9 @@ bool PipelineCache::RefreshGraphicsKey() { if (ShouldSkipShader(bininfo->shader_hash, "graphics")) { return false; } - const auto stage = Shader::Stage{i}; - const GuestProgram guest_pgm{pgm, stage}; - std::tie(infos[i], modules[i], key.stage_hashes[i]) = - shader_cache->GetProgram(guest_pgm, binding); + const auto stage = Shader::StageFromIndex(i); + const auto params = Liverpool::GetParams(*pgm); + std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } return true; } @@ -187,12 +294,80 @@ bool PipelineCache::RefreshGraphicsKey() { bool PipelineCache::RefreshComputeKey() { u32 binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; - const GuestProgram guest_pgm{cs_pgm, Shader::Stage::Compute}; - if (ShouldSkipShader(guest_pgm.hash, "compute")) { + const auto cs_params = Liverpool::GetParams(*cs_pgm); + if (ShouldSkipShader(cs_params.hash, "compute")) { return false; } - std::tie(infos[0], modules[0], compute_key) = shader_cache->GetProgram(guest_pgm, binding); + std::tie(infos[0], modules[0], compute_key) = + GetProgram(Shader::Stage::Compute, cs_params, binding); return true; } +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, + const Shader::RuntimeInfo& runtime_info, + std::span code, size_t perm_idx, + u32& binding) { + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation)" : ""); + if (Config::dumpShaders()) { + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); + } + + const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); + const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); + if (Config::dumpShaders()) { + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + } + + const auto module = CompileSPV(spv, instance.GetDevice()); + const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); + Vulkan::SetObjectName(instance.GetDevice(), module, name); + return module; +} + +std::tuple PipelineCache::GetProgram( + Shader::Stage stage, Shader::ShaderParams params, u32& binding) { + const auto runtime_info = BuildRuntimeInfo(stage, graphics_key, liverpool->regs); + auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); + if (new_program) { + Program* program = program_pool.Create(stage, params); + u32 start_binding = binding; + const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding); + program->AddPermut(module, std::move(spec)); + it_pgm.value() = program; + return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); + } + + Program* program = it_pgm->second; + const auto& info = program->info; + const auto spec = Shader::StageSpecialization(info, runtime_info, binding); + size_t perm_idx = program->modules.size(); + vk::ShaderModule module{}; + + const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); + if (it == program->modules.end()) { + auto new_info = Shader::Info(stage, params); + module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + program->AddPermut(module, std::move(spec)); + } else { + binding += info.NumBindings(); + module = it->module; + perm_idx = std::distance(program->modules.begin(), it); + } + return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); +} + +void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, + size_t perm_idx, std::string_view ext) { + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteSpan(code); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 40853b746..26130994c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -4,6 +4,9 @@ #pragma once #include +#include "shader_recompiler/profile.h" +#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/specialization.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -17,6 +20,22 @@ class Instance; class Scheduler; class ShaderCache; +struct Program { + struct Module { + vk::ShaderModule module; + Shader::StageSpecialization spec; + }; + + Shader::Info info; + boost::container::small_vector modules; + + explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} + + void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { + modules.emplace_back(module, std::move(spec)); + } +}; + class PipelineCache { static constexpr size_t MaxShaderStages = 5; @@ -29,17 +48,29 @@ public: const ComputePipeline* GetComputePipeline(); + std::tuple GetProgram(Shader::Stage stage, + Shader::ShaderParams params, + u32& binding); + private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); + void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, + std::string_view ext); + vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, + std::span code, size_t perm_idx, u32& binding); + private: const Instance& instance; Scheduler& scheduler; AmdGpu::Liverpool* liverpool; vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; - std::unique_ptr shader_cache; + Shader::Profile profile{}; + Shader::Pools pools; + tsl::robin_map program_cache; + Common::ObjectPool program_pool; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines; std::array infos{}; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index c73a8139d..2318bb247 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -17,13 +17,23 @@ #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" +#include "common/path_util.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/vk_platform.h" +#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL +static vk::DynamicLoader dl; +#else +extern "C" { +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, + const char* pName); +} +#endif + namespace Vulkan { static const char* const VALIDATION_LAYER_NAME = "VK_LAYER_KHRONOS_validation"; -static const char* const API_DUMP_LAYER_NAME = "VK_LAYER_LUNARG_api_dump"; +static const char* const CRASH_DIAGNOSTIC_LAYER_NAME = "VK_LAYER_LUNARG_crash_diagnostic"; static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, @@ -186,12 +196,14 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window return extensions; } -vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemType window_type, - bool enable_validation, bool dump_command_buffers) { +vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool enable_validation, + bool enable_crash_diagnostic) { LOG_INFO(Render_Vulkan, "Creating vulkan instance"); +#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL auto vkGetInstanceProcAddr = dl.getProcAddress("vkGetInstanceProcAddr"); +#endif VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); const u32 available_version = VULKAN_HPP_DEFAULT_DISPATCHER.vkEnumerateInstanceVersion @@ -216,12 +228,27 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT u32 num_layers = 0; std::array layers; + vk::Bool32 enable_force_barriers = vk::False; + const char* log_path{}; + +#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL if (enable_validation) { layers[num_layers++] = VALIDATION_LAYER_NAME; } - if (dump_command_buffers) { - layers[num_layers++] = API_DUMP_LAYER_NAME; + + if (enable_crash_diagnostic) { + layers[num_layers++] = CRASH_DIAGNOSTIC_LAYER_NAME; + static const auto crash_diagnostic_path = + Common::FS::GetUserPathString(Common::FS::PathType::LogDir); + log_path = crash_diagnostic_path.c_str(); + enable_force_barriers = vk::True; } +#else + if (enable_validation || enable_crash_diagnostic) { + LOG_WARNING(Render_Vulkan, + "Skipping loading Vulkan layers as dynamic loading is not enabled."); + } +#endif vk::Bool32 enable_sync = enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; @@ -240,7 +267,7 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT }, vk::LayerSettingEXT{ .pLayerName = VALIDATION_LAYER_NAME, - .pSettingName = "sync_queue_submit", + .pSettingName = "syncval_submit_time_validation", .type = vk::LayerSettingTypeEXT::eBool32, .valueCount = 1, .pValues = &enable_sync, @@ -280,6 +307,20 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT .valueCount = 1, .pValues = &enable_gpuav, }, + vk::LayerSettingEXT{ + .pLayerName = "lunarg_crash_diagnostic", + .pSettingName = "output_path", + .type = vk::LayerSettingTypeEXT::eString, + .valueCount = 1, + .pValues = &log_path, + }, + vk::LayerSettingEXT{ + .pLayerName = "lunarg_crash_diagnostic", + .pSettingName = "sync_after_commands", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_force_barriers, + }, }; vk::StructureChain instance_ci_chain = { diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h index 582de3831..e38bd2fef 100644 --- a/src/video_core/renderer_vulkan/vk_platform.h +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -21,8 +21,8 @@ constexpr u32 TargetVulkanApiVersion = VK_API_VERSION_1_2; vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::WindowSDL& emu_window); -vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemType window_type, - bool enable_validation, bool dump_command_buffers); +vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool enable_validation, + bool enable_crash_diagnostic); vk::UniqueDebugUtilsMessengerEXT CreateDebugCallback(vk::Instance instance); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index cadce01eb..5a20899db 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -321,7 +321,7 @@ void Rasterizer::UpdateDepthStencilState() { } void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { - if (Config::nullGpu() || !Config::isMarkersEnabled()) { + if (Config::nullGpu() || !Config::vkMarkersEnabled()) { return; } @@ -332,7 +332,7 @@ void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { } void Rasterizer::ScopeMarkerEnd() { - if (Config::nullGpu() || !Config::isMarkersEnabled()) { + if (Config::nullGpu() || !Config::vkMarkersEnabled()) { return; } @@ -341,7 +341,7 @@ void Rasterizer::ScopeMarkerEnd() { } void Rasterizer::ScopedMarkerInsert(const std::string_view& str) { - if (Config::nullGpu() || !Config::isMarkersEnabled()) { + if (Config::nullGpu() || !Config::vkMarkersEnabled()) { return; } @@ -351,11 +351,4 @@ void Rasterizer::ScopedMarkerInsert(const std::string_view& str) { }); } -void Rasterizer::Breadcrumb(u64 id) { - if (Config::nullGpu() || !instance.HasNvCheckpoints()) { - return; - } - scheduler.CommandBuffer().setCheckpointNV(id); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c38fe6ee9..43ab4756d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -40,7 +40,6 @@ public: void ScopeMarkerBegin(const std::string_view& str); void ScopeMarkerEnd(); void ScopedMarkerInsert(const std::string_view& str); - void Breadcrumb(u64 id); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 2f1f13d72..9ff332aef 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -192,13 +192,6 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { try { instance.GetGraphicsQueue().submit(submit_info, info.fence); } catch (vk::DeviceLostError& err) { - if (instance.HasNvCheckpoints()) { - const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV(); - for (const auto& cp : checkpoint_data) { - LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage), - reinterpret_cast(cp.pCheckpointMarker)); - } - } UNREACHABLE_MSG("Device lost during submit: {}", err.what()); } diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.cpp b/src/video_core/renderer_vulkan/vk_shader_cache.cpp deleted file mode 100644 index 9250f84ce..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_cache.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/config.h" -#include "common/io_file.h" -#include "common/path_util.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/recompiler.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_platform.h" -#include "video_core/renderer_vulkan/vk_shader_cache.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" - -namespace Vulkan { - -using Shader::VsOutput; - -void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { - const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { - if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || - w != VsOutput::None) { - info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); - } - }; - // VS_OUT_MISC_VEC - add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, - ctl.use_vtx_edge_flag - ? VsOutput::EdgeFlag - : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), - ctl.use_vtx_kill_flag - ? VsOutput::KillFlag - : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), - ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); - // VS_OUT_CCDIST0 - add_output(ctl.IsClipDistEnabled(0) - ? VsOutput::ClipDist0 - : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), - ctl.IsClipDistEnabled(1) - ? VsOutput::ClipDist1 - : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), - ctl.IsClipDistEnabled(2) - ? VsOutput::ClipDist2 - : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), - ctl.IsClipDistEnabled(3) - ? VsOutput::ClipDist3 - : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); - // VS_OUT_CCDIST1 - add_output(ctl.IsClipDistEnabled(4) - ? VsOutput::ClipDist4 - : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), - ctl.IsClipDistEnabled(5) - ? VsOutput::ClipDist5 - : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), - ctl.IsClipDistEnabled(6) - ? VsOutput::ClipDist6 - : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), - ctl.IsClipDistEnabled(7) - ? VsOutput::ClipDist7 - : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); -} - -Shader::Info MakeShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs) { - Shader::Info info{}; - info.user_data = pgm.user_data; - info.pgm_base = VAddr(pgm.code.data()); - info.pgm_hash = pgm.hash; - info.stage = pgm.stage; - switch (pgm.stage) { - case Shader::Stage::Vertex: { - info.num_user_data = regs.vs_program.settings.num_user_regs; - info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; - BuildVsOutputs(info, regs.vs_output_control); - break; - } - case Shader::Stage::Fragment: { - info.num_user_data = regs.ps_program.settings.num_user_regs; - for (u32 i = 0; i < regs.num_interp; i++) { - info.ps_inputs.push_back({ - .param_index = regs.ps_inputs[i].input_offset.Value(), - .is_default = bool(regs.ps_inputs[i].use_default), - .is_flat = bool(regs.ps_inputs[i].flat_shade), - .default_value = regs.ps_inputs[i].default_value, - }); - } - break; - } - case Shader::Stage::Compute: { - const auto& cs_pgm = regs.cs_program; - info.num_user_data = cs_pgm.settings.num_user_regs; - info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, - cs_pgm.num_thread_z.full}; - info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), - cs_pgm.IsTgidEnabled(2)}; - info.shared_memory_size = cs_pgm.SharedMemSize(); - break; - } - default: - break; - } - return info; -} - -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); -} - -ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { - profile = Shader::Profile{ - .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, - .subgroup_size = instance.SubgroupSize(), - .support_explicit_workgroup_layout = true, - }; -} - -vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span code, - size_t perm_idx, u32& binding) { - LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, - perm_idx != 0 ? "(permutation)" : ""); - - if (Config::dumpShaders()) { - DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - } - - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); - - // Compile IR to SPIR-V - const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); - if (Config::dumpShaders()) { - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); - } - - // Create module and set name to hash in renderdoc - const auto module = CompileSPV(spv, instance.GetDevice()); - ASSERT(module != VK_NULL_HANDLE); - const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); - Vulkan::SetObjectName(instance.GetDevice(), module, name); - return module; -} - -Program* ShaderCache::CreateProgram(const GuestProgram& pgm, u32& binding) { - Program* program = program_pool.Create(MakeShaderInfo(pgm, liverpool->regs)); - u32 start_binding = binding; - const auto module = CompileModule(program->info, pgm.code, 0, binding); - program->modules.emplace_back(module, StageSpecialization{program->info, start_binding}); - return program; -} - -std::tuple ShaderCache::GetProgram( - const GuestProgram& pgm, u32& binding) { - auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash); - if (new_program) { - auto program = CreateProgram(pgm, binding); - const auto module = program->modules.back().module; - it_pgm.value() = program; - return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0)); - } - - Program* program = it_pgm->second; - const auto& info = program->info; - size_t perm_idx = program->modules.size(); - StageSpecialization spec{info, binding}; - vk::ShaderModule module{}; - - const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); - if (it == program->modules.end()) { - auto new_info = MakeShaderInfo(pgm, liverpool->regs); - module = CompileModule(new_info, pgm.code, perm_idx, binding); - program->modules.emplace_back(module, std::move(spec)); - } else { - binding += info.NumBindings(); - module = it->module; - perm_idx = std::distance(program->modules.begin(), it); - } - return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx)); -} - -void ShaderCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, - size_t perm_idx, std::string_view ext) { - using namespace Common::FS; - const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; - if (!std::filesystem::exists(dump_dir)) { - std::filesystem::create_directories(dump_dir); - } - const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); - const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteSpan(code); -} - -} // namespace Vulkan diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 0b725655b..0d20eaeab 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,19 +2,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" -#include "common/config.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/image.h" -#include "video_core/texture_cache/tile_manager.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnullability-completeness" #include +#pragma GCC diagnostic pop namespace VideoCore { using namespace Vulkan; -using Libraries::VideoOut::TilingMode; bool ImageInfo::IsBlockCoded() const { switch (pixel_format) { diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 8b0227624..5f3ed0f89 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -15,7 +15,10 @@ #include #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnullability-completeness" #include +#pragma GCC diagnostic pop namespace VideoCore {