mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Merge branch 'main' into avplayer-improvements
This commit is contained in:
commit
a16ecdc6b4
20 changed files with 439 additions and 70 deletions
|
@ -952,6 +952,10 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||||
src/video_core/renderer_vulkan/host_passes/fsr_pass.h
|
src/video_core/renderer_vulkan/host_passes/fsr_pass.h
|
||||||
src/video_core/renderer_vulkan/host_passes/pp_pass.cpp
|
src/video_core/renderer_vulkan/host_passes/pp_pass.cpp
|
||||||
src/video_core/renderer_vulkan/host_passes/pp_pass.h
|
src/video_core/renderer_vulkan/host_passes/pp_pass.h
|
||||||
|
src/video_core/texture_cache/blit_helper.cpp
|
||||||
|
src/video_core/texture_cache/blit_helper.h
|
||||||
|
src/video_core/texture_cache/host_compatibility.cpp
|
||||||
|
src/video_core/texture_cache/host_compatibility.h
|
||||||
src/video_core/texture_cache/image.cpp
|
src/video_core/texture_cache/image.cpp
|
||||||
src/video_core/texture_cache/image.h
|
src/video_core/texture_cache/image.h
|
||||||
src/video_core/texture_cache/image_info.cpp
|
src/video_core/texture_cache/image_info.cpp
|
||||||
|
@ -965,8 +969,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||||
src/video_core/texture_cache/tile_manager.cpp
|
src/video_core/texture_cache/tile_manager.cpp
|
||||||
src/video_core/texture_cache/tile_manager.h
|
src/video_core/texture_cache/tile_manager.h
|
||||||
src/video_core/texture_cache/types.h
|
src/video_core/texture_cache/types.h
|
||||||
src/video_core/texture_cache/host_compatibility.cpp
|
|
||||||
src/video_core/texture_cache/host_compatibility.h
|
|
||||||
src/video_core/page_manager.cpp
|
src/video_core/page_manager.cpp
|
||||||
src/video_core/page_manager.h
|
src/video_core/page_manager.h
|
||||||
src/video_core/multi_level_page_table.h
|
src/video_core/multi_level_page_table.h
|
||||||
|
|
|
@ -19,8 +19,7 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
const Id pointer{
|
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
|
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||||
|
@ -32,8 +31,7 @@ Id SharedAtomicU32IncDec(EmitContext& ctx, Id offset,
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
const Id pointer{
|
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
|
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
||||||
|
@ -45,8 +43,7 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
||||||
const Id shift_id{ctx.ConstU32(3U)};
|
const Id shift_id{ctx.ConstU32(3U)};
|
||||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
const Id pointer{
|
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)};
|
||||||
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
|
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
|
||||||
|
|
|
@ -14,8 +14,7 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
||||||
|
|
||||||
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
const Id pointer =
|
const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u16, ctx.shared_memory_u16, index);
|
||||||
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
|
|
||||||
return ctx.OpLoad(ctx.U16, pointer);
|
return ctx.OpLoad(ctx.U16, pointer);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -26,8 +25,7 @@ Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
|
|
||||||
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
const Id pointer =
|
const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -38,8 +36,7 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
|
|
||||||
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
const Id pointer{
|
const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index);
|
||||||
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
|
||||||
return ctx.OpLoad(ctx.U64, pointer);
|
return ctx.OpLoad(ctx.U64, pointer);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -50,8 +47,7 @@ void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
||||||
|
|
||||||
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
const Id pointer =
|
const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u16, ctx.shared_memory_u16, index);
|
||||||
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
|
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
return Id{0};
|
return Id{0};
|
||||||
});
|
});
|
||||||
|
@ -63,8 +59,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
|
|
||||||
AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
const Id pointer =
|
const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
return Id{0};
|
return Id{0};
|
||||||
});
|
});
|
||||||
|
@ -76,8 +71,7 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
|
|
||||||
AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
const Id pointer{
|
const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index);
|
||||||
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
return Id{0};
|
return Id{0};
|
||||||
});
|
});
|
||||||
|
|
|
@ -972,7 +972,12 @@ void EmitContext::DefineImagesAndSamplers() {
|
||||||
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
|
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
|
||||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
Name(id, fmt::format("{}_{}{}", stage, "samp", samp_desc.sharp_idx));
|
auto sharp_desc = std::holds_alternative<u32>(samp_desc.sampler)
|
||||||
|
? fmt::format("sgpr:{}", std::get<u32>(samp_desc.sampler))
|
||||||
|
: fmt::format("inline:{:#x}:{:#x}",
|
||||||
|
std::get<AmdGpu::Sampler>(samp_desc.sampler).raw0,
|
||||||
|
std::get<AmdGpu::Sampler>(samp_desc.sampler).raw1);
|
||||||
|
Name(id, fmt::format("{}_{}{}", stage, "samp", sharp_desc));
|
||||||
samplers.push_back(id);
|
samplers.push_back(id);
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
|
@ -995,19 +1000,26 @@ void EmitContext::DefineSharedMemory() {
|
||||||
|
|
||||||
const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
|
const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
|
||||||
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||||
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
|
||||||
|
|
||||||
|
const auto mem_type = [&] {
|
||||||
|
if (num_types > 1) {
|
||||||
const Id struct_type{TypeStruct(array_type)};
|
const Id struct_type{TypeStruct(array_type)};
|
||||||
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
|
MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
|
||||||
|
return struct_type;
|
||||||
|
} else {
|
||||||
|
return array_type;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type);
|
const Id pointer = TypePointer(spv::StorageClass::Workgroup, mem_type);
|
||||||
const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
|
const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
|
||||||
const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
|
const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
|
||||||
Name(variable, name);
|
Name(variable, name);
|
||||||
interfaces.push_back(variable);
|
interfaces.push_back(variable);
|
||||||
|
|
||||||
if (num_types > 1) {
|
if (num_types > 1) {
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||||
Decorate(variable, spv::Decoration::Aliased);
|
Decorate(variable, spv::Decoration::Aliased);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -203,6 +203,14 @@ public:
|
||||||
return final_result;
|
return final_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedMemoryAccess(const Id result_type, const Id shared_mem, const Id index) {
|
||||||
|
if (std::popcount(static_cast<u32>(info.shared_types)) > 1) {
|
||||||
|
return OpAccessChain(result_type, shared_mem, u32_zero_value, index);
|
||||||
|
}
|
||||||
|
// Workgroup layout struct omitted.
|
||||||
|
return OpAccessChain(result_type, shared_mem, index);
|
||||||
|
}
|
||||||
|
|
||||||
Info& info;
|
Info& info;
|
||||||
const RuntimeInfo& runtime_info;
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
|
|
|
@ -531,8 +531,10 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
||||||
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
||||||
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
||||||
// binding index.
|
// binding index.
|
||||||
const IR::Value handle =
|
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||||
ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg));
|
const IR::Value inline_sampler =
|
||||||
|
ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1),
|
||||||
|
ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3));
|
||||||
|
|
||||||
// Determine how many address registers need to be passed.
|
// Determine how many address registers need to be passed.
|
||||||
// The image type is unknown, so add all 4 possible base registers and resolve later.
|
// The image type is unknown, so add all 4 possible base registers and resolve later.
|
||||||
|
@ -568,7 +570,8 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
||||||
const IR::Value address4 = get_addr_reg(12);
|
const IR::Value address4 = get_addr_reg(12);
|
||||||
|
|
||||||
// Issue the placeholder IR instruction.
|
// Issue the placeholder IR instruction.
|
||||||
IR::Value texel = ir.ImageSampleRaw(handle, address1, address2, address3, address4, info);
|
IR::Value texel =
|
||||||
|
ir.ImageSampleRaw(handle, address1, address2, address3, address4, inline_sampler, info);
|
||||||
if (info.is_depth && !gather) {
|
if (info.is_depth && !gather) {
|
||||||
// For non-gather depth sampling, only return a single value.
|
// For non-gather depth sampling, only return a single value.
|
||||||
texel = ir.CompositeExtract(texel, 0);
|
texel = ir.CompositeExtract(texel, 0);
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
@ -91,11 +92,15 @@ struct ImageResource {
|
||||||
using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>;
|
using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>;
|
||||||
|
|
||||||
struct SamplerResource {
|
struct SamplerResource {
|
||||||
u32 sharp_idx;
|
std::variant<u32, AmdGpu::Sampler> sampler;
|
||||||
AmdGpu::Sampler inline_sampler{};
|
|
||||||
u32 associated_image : 4;
|
u32 associated_image : 4;
|
||||||
u32 disable_aniso : 1;
|
u32 disable_aniso : 1;
|
||||||
|
|
||||||
|
SamplerResource(u32 sharp_idx, u32 associated_image_, bool disable_aniso_)
|
||||||
|
: sampler{sharp_idx}, associated_image{associated_image_}, disable_aniso{disable_aniso_} {}
|
||||||
|
SamplerResource(AmdGpu::Sampler sampler_)
|
||||||
|
: sampler{sampler_}, associated_image{0}, disable_aniso(0) {}
|
||||||
|
|
||||||
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
||||||
};
|
};
|
||||||
using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>;
|
using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>;
|
||||||
|
@ -318,7 +323,9 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||||
return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
|
return std::holds_alternative<AmdGpu::Sampler>(sampler)
|
||||||
|
? std::get<AmdGpu::Sampler>(sampler)
|
||||||
|
: info.ReadUdSharp<AmdGpu::Sampler>(std::get<u32>(sampler));
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {
|
constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
|
|
@ -1964,9 +1964,9 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
|
||||||
|
|
||||||
Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2,
|
Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2,
|
||||||
const Value& address3, const Value& address4,
|
const Value& address3, const Value& address4,
|
||||||
TextureInstInfo info) {
|
const Value& inline_sampler, TextureInstInfo info) {
|
||||||
return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3,
|
return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, address4,
|
||||||
address4);
|
inline_sampler);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
||||||
|
|
|
@ -349,7 +349,8 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1,
|
[[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1,
|
||||||
const Value& address2, const Value& address3,
|
const Value& address2, const Value& address3,
|
||||||
const Value& address4, TextureInstInfo info);
|
const Value& address4, const Value& inline_sampler,
|
||||||
|
TextureInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
|
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
|
||||||
const F32& bias, const Value& offset,
|
const F32& bias, const Value& offset,
|
||||||
|
|
|
@ -412,7 +412,7 @@ OPCODE(ConvertU8U32, U8, U32,
|
||||||
OPCODE(ConvertU32U8, U32, U8, )
|
OPCODE(ConvertU32U8, U32, U8, )
|
||||||
|
|
||||||
// Image operations
|
// Image operations
|
||||||
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, )
|
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, )
|
||||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, )
|
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, )
|
||||||
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
|
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
|
||||||
OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
||||||
|
|
|
@ -168,7 +168,7 @@ public:
|
||||||
|
|
||||||
u32 Add(const SamplerResource& desc) {
|
u32 Add(const SamplerResource& desc) {
|
||||||
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
|
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
|
||||||
return desc.sharp_idx == existing.sharp_idx;
|
return desc.sampler == existing.sampler;
|
||||||
})};
|
})};
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
@ -351,8 +351,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||||
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||||
const auto opcode = inst->GetOpcode();
|
const auto opcode = inst->GetOpcode();
|
||||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
if (opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
|
||||||
opcode == IR::Opcode::GetUserData) {
|
opcode == IR::Opcode::GetUserData) {
|
||||||
return inst;
|
return inst;
|
||||||
}
|
}
|
||||||
|
@ -360,9 +359,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||||
};
|
};
|
||||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||||
const IR::Inst* producer = result.value();
|
const IR::Inst* tsharp_handle = result.value();
|
||||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
|
||||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
|
||||||
|
|
||||||
// Read image sharp.
|
// Read image sharp.
|
||||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||||
|
@ -427,29 +424,32 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||||
|
|
||||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||||
// Read sampler sharp.
|
// Read sampler sharp.
|
||||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
const auto sampler_binding = [&] -> u32 {
|
||||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
const auto sampler = inst.Arg(5).InstRecursive();
|
||||||
const IR::Value& handle = producer->Arg(1);
|
ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4);
|
||||||
|
const auto handle = sampler->Arg(0);
|
||||||
// Inline sampler resource.
|
// Inline sampler resource.
|
||||||
if (handle.IsImmediate()) {
|
if (handle.IsImmediate()) {
|
||||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
LOG_DEBUG(Render_Vulkan, "Inline sampler detected");
|
||||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
const auto [s1, s2, s3, s4] =
|
||||||
const auto binding = descriptors.Add(SamplerResource{
|
std::tuple{sampler->Arg(0), sampler->Arg(1), sampler->Arg(2), sampler->Arg(3)};
|
||||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
ASSERT(s1.IsImmediate() && s2.IsImmediate() && s3.IsImmediate() &&
|
||||||
.inline_sampler = inline_sampler,
|
s4.IsImmediate());
|
||||||
});
|
const auto inline_sampler = AmdGpu::Sampler{
|
||||||
return {binding, inline_sampler};
|
.raw0 = u64(s2.U32()) << 32 | u64(s1.U32()),
|
||||||
}
|
.raw1 = u64(s4.U32()) << 32 | u64(s3.U32()),
|
||||||
|
};
|
||||||
|
const auto binding = descriptors.Add(SamplerResource{inline_sampler});
|
||||||
|
return binding;
|
||||||
|
} else {
|
||||||
// Normal sampler resource.
|
// Normal sampler resource.
|
||||||
const auto ssharp_handle = handle.InstRecursive();
|
const auto ssharp_handle = handle.InstRecursive();
|
||||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||||
const auto binding = descriptors.Add(SamplerResource{
|
const auto binding =
|
||||||
.sharp_idx = ssharp,
|
descriptors.Add(SamplerResource{ssharp, image_binding, disable_aniso});
|
||||||
.associated_image = image_binding,
|
return binding;
|
||||||
.disable_aniso = disable_aniso,
|
}
|
||||||
});
|
|
||||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
|
||||||
}();
|
}();
|
||||||
// Patch image and sampler handle.
|
// Patch image and sampler handle.
|
||||||
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
|
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
|
||||||
|
|
|
@ -11,6 +11,7 @@ set(SHADER_FILES
|
||||||
detilers/micro_32bpp.comp
|
detilers/micro_32bpp.comp
|
||||||
detilers/micro_64bpp.comp
|
detilers/micro_64bpp.comp
|
||||||
detilers/micro_8bpp.comp
|
detilers/micro_8bpp.comp
|
||||||
|
color_to_ms_depth.frag
|
||||||
fault_buffer_process.comp
|
fault_buffer_process.comp
|
||||||
fs_tri.vert
|
fs_tri.vert
|
||||||
fsr.comp
|
fsr.comp
|
||||||
|
|
15
src/video_core/host_shaders/color_to_ms_depth.frag
Normal file
15
src/video_core/host_shaders/color_to_ms_depth.frag
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450 core
|
||||||
|
#extension GL_EXT_samplerless_texture_functions : require
|
||||||
|
|
||||||
|
layout (binding = 0, set = 0) uniform texture2D color;
|
||||||
|
|
||||||
|
layout (location = 0) in vec2 uv;
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
ivec2 coord = ivec2(uv * vec2(textureSize(color, 0).xy));
|
||||||
|
gl_FragDepth = texelFetch(color, coord, 0)[gl_SampleID];
|
||||||
|
}
|
|
@ -355,6 +355,7 @@ bool Instance::CreateDevice() {
|
||||||
.independentBlend = features.independentBlend,
|
.independentBlend = features.independentBlend,
|
||||||
.geometryShader = features.geometryShader,
|
.geometryShader = features.geometryShader,
|
||||||
.tessellationShader = features.tessellationShader,
|
.tessellationShader = features.tessellationShader,
|
||||||
|
.sampleRateShading = features.sampleRateShading,
|
||||||
.dualSrcBlend = features.dualSrcBlend,
|
.dualSrcBlend = features.dualSrcBlend,
|
||||||
.logicOp = features.logicOp,
|
.logicOp = features.logicOp,
|
||||||
.multiDrawIndirect = features.multiDrawIndirect,
|
.multiDrawIndirect = features.multiDrawIndirect,
|
||||||
|
|
|
@ -328,6 +328,7 @@ public:
|
||||||
return render_state;
|
return render_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the current pipeline dynamic state tracking.
|
||||||
DynamicState& GetDynamicState() {
|
DynamicState& GetDynamicState() {
|
||||||
return dynamic_state;
|
return dynamic_state;
|
||||||
}
|
}
|
||||||
|
|
256
src/video_core/texture_cache/blit_helper.cpp
Normal file
256
src/video_core/texture_cache/blit_helper.cpp
Normal file
|
@ -0,0 +1,256 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
|
#include "video_core/texture_cache/blit_helper.h"
|
||||||
|
#include "video_core/texture_cache/image.h"
|
||||||
|
|
||||||
|
#include "video_core/host_shaders/color_to_ms_depth_frag.h"
|
||||||
|
#include "video_core/host_shaders/fs_tri_vert.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
static vk::SampleCountFlagBits ToSampleCount(u32 num_samples) {
|
||||||
|
switch (num_samples) {
|
||||||
|
case 1:
|
||||||
|
return vk::SampleCountFlagBits::e1;
|
||||||
|
case 2:
|
||||||
|
return vk::SampleCountFlagBits::e2;
|
||||||
|
case 4:
|
||||||
|
return vk::SampleCountFlagBits::e4;
|
||||||
|
case 8:
|
||||||
|
return vk::SampleCountFlagBits::e8;
|
||||||
|
case 16:
|
||||||
|
return vk::SampleCountFlagBits::e16;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Unknown samples count = {}", num_samples);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BlitHelper::BlitHelper(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
||||||
|
: instance{instance_}, scheduler{scheduler_} {
|
||||||
|
CreateShaders();
|
||||||
|
CreatePipelineLayouts();
|
||||||
|
}
|
||||||
|
|
||||||
|
BlitHelper::~BlitHelper() = default;
|
||||||
|
|
||||||
|
void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
|
||||||
|
source.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
|
||||||
|
dest.Transit(vk::ImageLayout::eDepthAttachmentOptimal,
|
||||||
|
vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {});
|
||||||
|
|
||||||
|
const vk::ImageViewUsageCreateInfo color_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled};
|
||||||
|
const vk::ImageViewCreateInfo color_view_ci = {
|
||||||
|
.pNext = &color_usage_ci,
|
||||||
|
.image = source.image,
|
||||||
|
.viewType = vk::ImageViewType::e2D,
|
||||||
|
.format = source.info.pixel_format,
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
|
.baseMipLevel = 0U,
|
||||||
|
.levelCount = 1U,
|
||||||
|
.baseArrayLayer = 0U,
|
||||||
|
.layerCount = 1U,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const auto [color_view_result, color_view] =
|
||||||
|
instance.GetDevice().createImageView(color_view_ci);
|
||||||
|
ASSERT_MSG(color_view_result == vk::Result::eSuccess, "Failed to create image view: {}",
|
||||||
|
vk::to_string(color_view_result));
|
||||||
|
const vk::ImageViewUsageCreateInfo depth_usage_ci{
|
||||||
|
.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment};
|
||||||
|
const vk::ImageViewCreateInfo depth_view_ci = {
|
||||||
|
.pNext = &depth_usage_ci,
|
||||||
|
.image = dest.image,
|
||||||
|
.viewType = vk::ImageViewType::e2D,
|
||||||
|
.format = dest.info.pixel_format,
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = vk::ImageAspectFlagBits::eDepth,
|
||||||
|
.baseMipLevel = 0U,
|
||||||
|
.levelCount = 1U,
|
||||||
|
.baseArrayLayer = 0U,
|
||||||
|
.layerCount = 1U,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const auto [depth_view_result, depth_view] =
|
||||||
|
instance.GetDevice().createImageView(depth_view_ci);
|
||||||
|
ASSERT_MSG(depth_view_result == vk::Result::eSuccess, "Failed to create image view: {}",
|
||||||
|
vk::to_string(depth_view_result));
|
||||||
|
scheduler.DeferOperation([device = instance.GetDevice(), color_view, depth_view] {
|
||||||
|
device.destroyImageView(color_view);
|
||||||
|
device.destroyImageView(depth_view);
|
||||||
|
});
|
||||||
|
|
||||||
|
Vulkan::RenderState state{};
|
||||||
|
state.has_depth = true;
|
||||||
|
state.width = dest.info.size.width;
|
||||||
|
state.height = dest.info.size.height;
|
||||||
|
state.depth_attachment = vk::RenderingAttachmentInfo{
|
||||||
|
.imageView = depth_view,
|
||||||
|
.imageLayout = vk::ImageLayout::eDepthAttachmentOptimal,
|
||||||
|
.loadOp = vk::AttachmentLoadOp::eDontCare,
|
||||||
|
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||||
|
.clearValue = vk::ClearValue{.depthStencil = {.depth = 0.f}},
|
||||||
|
};
|
||||||
|
scheduler.BeginRendering(state);
|
||||||
|
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
const vk::DescriptorImageInfo image_info = {
|
||||||
|
.sampler = VK_NULL_HANDLE,
|
||||||
|
.imageView = color_view,
|
||||||
|
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||||
|
};
|
||||||
|
const vk::WriteDescriptorSet texture_write = {
|
||||||
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
.dstBinding = 0U,
|
||||||
|
.dstArrayElement = 0U,
|
||||||
|
.descriptorCount = 1U,
|
||||||
|
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
};
|
||||||
|
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U,
|
||||||
|
texture_write);
|
||||||
|
|
||||||
|
const DepthPipelineKey key{dest.info.num_samples, dest.info.pixel_format};
|
||||||
|
const vk::Pipeline depth_pipeline = GetDepthToMsPipeline(key);
|
||||||
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_pipeline);
|
||||||
|
|
||||||
|
const vk::Viewport viewport = {
|
||||||
|
.x = 0,
|
||||||
|
.y = 0,
|
||||||
|
.width = float(state.width),
|
||||||
|
.height = float(state.height),
|
||||||
|
.minDepth = 0.f,
|
||||||
|
.maxDepth = 1.f,
|
||||||
|
};
|
||||||
|
cmdbuf.setViewport(0, viewport);
|
||||||
|
|
||||||
|
const vk::Rect2D scissor = {
|
||||||
|
.offset = {0, 0},
|
||||||
|
.extent = {state.width, state.height},
|
||||||
|
};
|
||||||
|
cmdbuf.setScissor(0, scissor);
|
||||||
|
|
||||||
|
cmdbuf.draw(3, 1, 0, 0);
|
||||||
|
|
||||||
|
scheduler.GetDynamicState().Invalidate();
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::Pipeline BlitHelper::GetDepthToMsPipeline(const DepthPipelineKey& key) {
|
||||||
|
auto it = std::ranges::find(color_to_ms_depth_pl, key, &DepthPipeline::first);
|
||||||
|
if (it != color_to_ms_depth_pl.end()) {
|
||||||
|
return *it->second;
|
||||||
|
}
|
||||||
|
CreateColorToMSDepthPipeline(key);
|
||||||
|
return *color_to_ms_depth_pl.back().second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlitHelper::CreateShaders() {
|
||||||
|
fs_tri_vertex = Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex,
|
||||||
|
instance.GetDevice());
|
||||||
|
color_to_ms_depth_frag =
|
||||||
|
Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG, vk::ShaderStageFlagBits::eFragment,
|
||||||
|
instance.GetDevice());
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlitHelper::CreatePipelineLayouts() {
|
||||||
|
const vk::DescriptorSetLayoutBinding texture_binding = {
|
||||||
|
.binding = 0,
|
||||||
|
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eFragment,
|
||||||
|
};
|
||||||
|
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
|
||||||
|
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||||
|
.bindingCount = 1U,
|
||||||
|
.pBindings = &texture_binding,
|
||||||
|
};
|
||||||
|
auto [desc_layout_result, desc_layout] =
|
||||||
|
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||||
|
single_texture_descriptor_set_layout = std::move(desc_layout);
|
||||||
|
const vk::DescriptorSetLayout set_layout = *single_texture_descriptor_set_layout;
|
||||||
|
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||||
|
.setLayoutCount = 1U,
|
||||||
|
.pSetLayouts = &set_layout,
|
||||||
|
.pushConstantRangeCount = 0U,
|
||||||
|
.pPushConstantRanges = nullptr,
|
||||||
|
};
|
||||||
|
auto [layout_result, pipeline_layout] =
|
||||||
|
instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||||
|
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||||
|
"Failed to create graphics pipeline layout: {}", vk::to_string(layout_result));
|
||||||
|
Vulkan::SetObjectName(instance.GetDevice(), *pipeline_layout, "Single texture pipeline layout");
|
||||||
|
single_texture_pl_layout = std::move(pipeline_layout);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
|
||||||
|
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||||
|
.topology = vk::PrimitiveTopology::eTriangleList,
|
||||||
|
};
|
||||||
|
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||||
|
.rasterizationSamples = ToSampleCount(key.num_samples),
|
||||||
|
};
|
||||||
|
const vk::PipelineDepthStencilStateCreateInfo depth_state = {
|
||||||
|
.depthTestEnable = true,
|
||||||
|
.depthWriteEnable = true,
|
||||||
|
.depthCompareOp = vk::CompareOp::eAlways,
|
||||||
|
};
|
||||||
|
const std::array dynamic_states = {vk::DynamicState::eViewportWithCount,
|
||||||
|
vk::DynamicState::eScissorWithCount};
|
||||||
|
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||||
|
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||||
|
.pDynamicStates = dynamic_states.data(),
|
||||||
|
};
|
||||||
|
|
||||||
|
std::array<vk::PipelineShaderStageCreateInfo, 2> shader_stages;
|
||||||
|
shader_stages[0] = {
|
||||||
|
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||||
|
.module = fs_tri_vertex,
|
||||||
|
.pName = "main",
|
||||||
|
};
|
||||||
|
shader_stages[1] = {
|
||||||
|
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||||
|
.module = color_to_ms_depth_frag,
|
||||||
|
.pName = "main",
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
|
||||||
|
.colorAttachmentCount = 0U,
|
||||||
|
.pColorAttachmentFormats = nullptr,
|
||||||
|
.depthAttachmentFormat = key.depth_format,
|
||||||
|
.stencilAttachmentFormat = vk::Format::eUndefined,
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::PipelineColorBlendStateCreateInfo color_blending{};
|
||||||
|
const vk::PipelineViewportStateCreateInfo viewport_info{};
|
||||||
|
const vk::PipelineVertexInputStateCreateInfo vertex_input_info{};
|
||||||
|
const vk::PipelineRasterizationStateCreateInfo raster_state{.lineWidth = 1.f};
|
||||||
|
|
||||||
|
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||||
|
.pNext = &pipeline_rendering_ci,
|
||||||
|
.stageCount = static_cast<u32>(shader_stages.size()),
|
||||||
|
.pStages = shader_stages.data(),
|
||||||
|
.pVertexInputState = &vertex_input_info,
|
||||||
|
.pInputAssemblyState = &input_assembly,
|
||||||
|
.pViewportState = &viewport_info,
|
||||||
|
.pRasterizationState = &raster_state,
|
||||||
|
.pMultisampleState = &multisampling,
|
||||||
|
.pDepthStencilState = &depth_state,
|
||||||
|
.pColorBlendState = &color_blending,
|
||||||
|
.pDynamicState = &dynamic_info,
|
||||||
|
.layout = *single_texture_pl_layout,
|
||||||
|
};
|
||||||
|
|
||||||
|
auto [pipeline_result, pipeline] =
|
||||||
|
instance.GetDevice().createGraphicsPipelineUnique(VK_NULL_HANDLE, pipeline_info);
|
||||||
|
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
|
||||||
|
vk::to_string(pipeline_result));
|
||||||
|
Vulkan::SetObjectName(instance.GetDevice(), *pipeline, "Color to MS Depth {}", key.num_samples);
|
||||||
|
|
||||||
|
color_to_ms_depth_pl.emplace_back(key, std::move(pipeline));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
55
src/video_core/texture_cache/blit_helper.h
Normal file
55
src/video_core/texture_cache/blit_helper.h
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <tsl/robin_map.h>
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
class Instance;
|
||||||
|
class Scheduler;
|
||||||
|
} // namespace Vulkan
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
class Image;
|
||||||
|
class ImageView;
|
||||||
|
|
||||||
|
class BlitHelper {
|
||||||
|
static constexpr size_t MaxMsPipelines = 6;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit BlitHelper(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||||
|
~BlitHelper();
|
||||||
|
|
||||||
|
void BlitColorToMsDepth(Image& source, Image& dest);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void CreateShaders();
|
||||||
|
void CreatePipelineLayouts();
|
||||||
|
|
||||||
|
struct DepthPipelineKey {
|
||||||
|
u32 num_samples;
|
||||||
|
vk::Format depth_format;
|
||||||
|
|
||||||
|
auto operator<=>(const DepthPipelineKey&) const noexcept = default;
|
||||||
|
};
|
||||||
|
vk::Pipeline GetDepthToMsPipeline(const DepthPipelineKey& key);
|
||||||
|
void CreateColorToMSDepthPipeline(const DepthPipelineKey& key);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Vulkan::Instance& instance;
|
||||||
|
Vulkan::Scheduler& scheduler;
|
||||||
|
vk::UniqueDescriptorSetLayout single_texture_descriptor_set_layout;
|
||||||
|
vk::UniquePipelineLayout single_texture_pl_layout;
|
||||||
|
vk::ShaderModule fs_tri_vertex;
|
||||||
|
vk::ShaderModule color_to_ms_depth_frag;
|
||||||
|
|
||||||
|
using DepthPipeline = std::pair<DepthPipelineKey, vk::UniquePipeline>;
|
||||||
|
std::vector<DepthPipeline> color_to_ms_depth_pl{};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -47,6 +47,7 @@ struct ImageInfo {
|
||||||
VAddr cmask_addr;
|
VAddr cmask_addr;
|
||||||
VAddr fmask_addr;
|
VAddr fmask_addr;
|
||||||
VAddr htile_addr;
|
VAddr htile_addr;
|
||||||
|
u32 htile_clear_mask{u32(-1)};
|
||||||
} meta_info{};
|
} meta_info{};
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -22,7 +22,7 @@ static constexpr u64 NumFramesBeforeRemoval = 32;
|
||||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
BufferCache& buffer_cache_, PageManager& tracker_)
|
BufferCache& buffer_cache_, PageManager& tracker_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
||||||
tile_manager{instance, scheduler} {
|
blit_helper{instance, scheduler}, tile_manager{instance, scheduler} {
|
||||||
// Create basic null image at fixed image ID.
|
// Create basic null image at fixed image ID.
|
||||||
const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm);
|
const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm);
|
||||||
ASSERT(null_id.index == NULL_IMAGE_ID.index);
|
ASSERT(null_id.index == NULL_IMAGE_ID.index);
|
||||||
|
@ -177,10 +177,20 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
|
||||||
auto& new_image = slot_images[new_image_id];
|
auto& new_image = slot_images[new_image_id];
|
||||||
new_image.usage = cache_image.usage;
|
new_image.usage = cache_image.usage;
|
||||||
new_image.flags &= ~ImageFlagBits::Dirty;
|
new_image.flags &= ~ImageFlagBits::Dirty;
|
||||||
|
// When creating a depth buffer through overlap resolution don't clear it on first use.
|
||||||
|
new_image.info.meta_info.htile_clear_mask = 0;
|
||||||
|
|
||||||
|
if (cache_image.info.num_samples == 1 && new_info.num_samples == 1) {
|
||||||
// Perform depth<->color copy using the intermediate copy buffer.
|
// Perform depth<->color copy using the intermediate copy buffer.
|
||||||
const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal);
|
const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal);
|
||||||
new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0);
|
new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0);
|
||||||
|
} else if (cache_image.info.num_samples == 1 && new_info.IsDepthStencil() &&
|
||||||
|
new_info.num_samples > 1) {
|
||||||
|
// Perform a rendering pass to transfer the channels of source as samples in dest.
|
||||||
|
blit_helper.BlitColorToMsDepth(cache_image, new_image);
|
||||||
|
} else {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Unimplemented depth overlap copy");
|
||||||
|
}
|
||||||
|
|
||||||
// Free the cache image.
|
// Free the cache image.
|
||||||
FreeImage(cache_image_id);
|
FreeImage(cache_image_id);
|
||||||
|
@ -202,7 +212,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||||
|
|
||||||
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
|
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
|
||||||
if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
|
if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
|
||||||
image_info.num_bits != tex_cache_image.info.num_bits) {
|
image_info.num_bits * image_info.num_samples !=
|
||||||
|
tex_cache_image.info.num_bits * tex_cache_image.info.num_samples) {
|
||||||
// Very likely this kind of overlap is caused by allocation from a pool.
|
// Very likely this kind of overlap is caused by allocation from a pool.
|
||||||
if (safe_to_delete) {
|
if (safe_to_delete) {
|
||||||
FreeImage(cache_image_id);
|
FreeImage(cache_image_id);
|
||||||
|
@ -470,8 +481,10 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||||
// Register meta data for this depth buffer
|
// Register meta data for this depth buffer
|
||||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
||||||
if (desc.info.meta_info.htile_addr) {
|
if (desc.info.meta_info.htile_addr) {
|
||||||
surface_metas.emplace(desc.info.meta_info.htile_addr,
|
surface_metas.emplace(
|
||||||
MetaDataInfo{.type = MetaDataInfo::Type::HTile});
|
desc.info.meta_info.htile_addr,
|
||||||
|
MetaDataInfo{.type = MetaDataInfo::Type::HTile,
|
||||||
|
.clear_mask = image.info.meta_info.htile_clear_mask});
|
||||||
image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
|
image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
|
||||||
image.flags |= ImageFlagBits::MetaRegistered;
|
image.flags |= ImageFlagBits::MetaRegistered;
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/slot_vector.h"
|
#include "common/slot_vector.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/multi_level_page_table.h"
|
#include "video_core/multi_level_page_table.h"
|
||||||
|
#include "video_core/texture_cache/blit_helper.h"
|
||||||
#include "video_core/texture_cache/image.h"
|
#include "video_core/texture_cache/image.h"
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
#include "video_core/texture_cache/sampler.h"
|
#include "video_core/texture_cache/sampler.h"
|
||||||
|
@ -286,6 +287,7 @@ private:
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
BufferCache& buffer_cache;
|
BufferCache& buffer_cache;
|
||||||
PageManager& tracker;
|
PageManager& tracker;
|
||||||
|
BlitHelper blit_helper;
|
||||||
TileManager tile_manager;
|
TileManager tile_manager;
|
||||||
Common::SlotVector<Image> slot_images;
|
Common::SlotVector<Image> slot_images;
|
||||||
Common::SlotVector<ImageView> slot_image_views;
|
Common::SlotVector<ImageView> slot_image_views;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue