mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-29 23:03:18 +00:00
GnmDriver: Clear context support (#567)
* gnmdriver: added support for gpu context reset * shader_recompiler: minor validation fixes * shader_recompiler: added `V_CMPX_GT_I32` * shader_recompiler: fix for crash on inline sampler access * compilation warnings and dead code elimination * amdgpu: fix for registers addressing * libraries: videoout: reduce logging pressure * shader_recompiler: fix for devergence scope detection
This commit is contained in:
parent
c2ddfe51e1
commit
b687ae5e34
19 changed files with 147 additions and 50 deletions
55
src/video_core/amdgpu/default_context.cpp
Normal file
55
src/video_core/amdgpu/default_context.cpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
// The following values are taken from fpPS4:
|
||||
// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410
|
||||
//
|
||||
static constexpr std::array reg_array_default{
|
||||
0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u,
|
||||
0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u,
|
||||
0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u,
|
||||
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
|
||||
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
|
||||
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
|
||||
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
|
||||
0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
|
||||
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
|
||||
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
|
||||
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
|
||||
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
|
||||
0x2a00161au,
|
||||
};
|
||||
|
||||
void Liverpool::Regs::SetDefaults() {
|
||||
std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32));
|
||||
|
||||
std::memcpy(®_array[ContextRegWordOffset + 0x80], reg_array_default.data(),
|
||||
reg_array_default.size() * sizeof(u32));
|
||||
|
||||
// Individual context regs values
|
||||
reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u;
|
||||
reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u;
|
||||
reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u;
|
||||
reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u;
|
||||
reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u;
|
||||
reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u;
|
||||
reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u;
|
||||
reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u;
|
||||
reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u;
|
||||
reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u;
|
||||
reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u;
|
||||
reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u;
|
||||
reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u;
|
||||
reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u;
|
||||
reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu;
|
||||
reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u;
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
|
@ -216,6 +216,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
break;
|
||||
}
|
||||
case PM4ItOpcode::ClearState: {
|
||||
regs.SetDefaults();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetConfigReg: {
|
||||
|
|
|
@ -1017,6 +1017,8 @@ struct Liverpool {
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void SetDefaults();
|
||||
};
|
||||
|
||||
Regs regs{};
|
||||
|
|
|
@ -38,9 +38,9 @@ struct UniqueBuffer {
|
|||
UniqueBuffer& operator=(const UniqueBuffer&) = delete;
|
||||
|
||||
UniqueBuffer(UniqueBuffer&& other)
|
||||
: buffer{std::exchange(other.buffer, VK_NULL_HANDLE)},
|
||||
allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {}
|
||||
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
|
||||
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {}
|
||||
UniqueBuffer& operator=(UniqueBuffer&& other) {
|
||||
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
|
||||
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
||||
|
|
|
@ -274,6 +274,7 @@ bool Instance::CreateDevice() {
|
|||
.independentBlend = features.independentBlend,
|
||||
.geometryShader = features.geometryShader,
|
||||
.logicOp = features.logicOp,
|
||||
.depthBiasClamp = features.depthBiasClamp,
|
||||
.multiViewport = features.multiViewport,
|
||||
.samplerAnisotropy = features.samplerAnisotropy,
|
||||
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
||||
|
|
|
@ -138,8 +138,8 @@ void Rasterizer::BeginRendering() {
|
|||
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat;
|
||||
if (regs.depth_buffer.Address() != 0 &&
|
||||
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) ||
|
||||
regs.depth_control.stencil_enable &&
|
||||
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid)) {
|
||||
(regs.depth_control.stencil_enable &&
|
||||
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) {
|
||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
||||
texture_cache.IsMetaCleared(htile_address);
|
||||
|
|
|
@ -43,9 +43,9 @@ struct UniqueImage {
|
|||
UniqueImage& operator=(const UniqueImage&) = delete;
|
||||
|
||||
UniqueImage(UniqueImage&& other)
|
||||
: image{std::exchange(other.image, VK_NULL_HANDLE)},
|
||||
allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {}
|
||||
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
|
||||
image{std::exchange(other.image, VK_NULL_HANDLE)} {}
|
||||
UniqueImage& operator=(UniqueImage&& other) {
|
||||
image = std::exchange(other.image, VK_NULL_HANDLE);
|
||||
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
|
@ -110,7 +111,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
|||
|
||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
||||
ImageId image_id_, std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
||||
: info{info_}, image_id{image_id_} {
|
||||
: image_id{image_id_}, info{info_} {
|
||||
vk::ImageViewUsageCreateInfo usage_ci{};
|
||||
if (usage_override) {
|
||||
usage_ci.usage = usage_override.value();
|
||||
|
|
|
@ -342,12 +342,6 @@ TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*
|
|||
.usage = usage,
|
||||
};
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Fix for detiler artifacts on macOS
|
||||
const bool is_large_buffer = true;
|
||||
#else
|
||||
const bool is_large_buffer = size > 128_MB;
|
||||
#endif
|
||||
VmaAllocationCreateInfo alloc_info{
|
||||
.flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
|
||||
|
@ -462,7 +456,6 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
|
|||
(m > 0 ? params.sizes[m - 1] : 0);
|
||||
}
|
||||
|
||||
auto pitch = image.info.pitch;
|
||||
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params),
|
||||
¶ms);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue