mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-01 08:13:16 +00:00
video_core: Preliminary storage image support and more (#188)
* vk_rasterizer: Clear depth buffer when DB_RENDER_CONTROL says so * video_core: Preliminary storage image support, more opcodes * renderer_vulkan: a fix for vertex buffers merging * renderer_vulkan: a heuristic for blend override when alpha out is masked --------- Co-authored-by: psucien <bad_cast@protonmail.com>
This commit is contained in:
parent
23f11a3fda
commit
7b1a317b09
30 changed files with 429 additions and 101 deletions
|
@ -420,6 +420,13 @@ struct Liverpool {
|
|||
};
|
||||
|
||||
union ColorBufferMask {
|
||||
enum ColorComponent : u32 {
|
||||
ComponentR = (1u << 0),
|
||||
ComponentG = (1u << 1),
|
||||
ComponentB = (1u << 2),
|
||||
ComponentA = (1u << 3),
|
||||
};
|
||||
|
||||
u32 raw;
|
||||
BitField<0, 4, u32> output0_mask;
|
||||
BitField<4, 4, u32> output1_mask;
|
||||
|
@ -430,8 +437,8 @@ struct Liverpool {
|
|||
BitField<24, 4, u32> output6_mask;
|
||||
BitField<28, 4, u32> output7_mask;
|
||||
|
||||
[[nodiscard]] u8 GetMask(int buf_id) const {
|
||||
return (raw >> (buf_id * 4)) & 0xffu;
|
||||
u32 GetMask(int buf_id) const {
|
||||
return (raw >> (buf_id * 4)) & 0xfu;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -732,6 +739,20 @@ struct Liverpool {
|
|||
float back_offset;
|
||||
};
|
||||
|
||||
struct Address {
|
||||
u32 address;
|
||||
|
||||
VAddr GetAddress() const {
|
||||
return u64(address) << 8;
|
||||
}
|
||||
};
|
||||
|
||||
union DepthRenderControl {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> depth_clear_enable;
|
||||
BitField<1, 1, u32> stencil_clear_enable;
|
||||
};
|
||||
|
||||
union Regs {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0x2C08);
|
||||
|
@ -740,11 +761,15 @@ struct Liverpool {
|
|||
ShaderProgram vs_program;
|
||||
INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16);
|
||||
ComputeProgram cs_program;
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80);
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
||||
DepthRenderControl depth_render_control;
|
||||
INSERT_PADDING_WORDS(4);
|
||||
Address depth_htile_data_base;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
float depth_bounds_min;
|
||||
float depth_bounds_max;
|
||||
u32 stencil_clear;
|
||||
u32 depth_clear;
|
||||
float depth_clear;
|
||||
Scissor screen_scissor;
|
||||
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
|
||||
DepthBuffer depth_buffer;
|
||||
|
@ -925,6 +950,8 @@ static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
|
|||
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
|
||||
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
|
||||
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||
|
@ -942,6 +969,7 @@ static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
|
|||
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
|
||||
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
|
||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
|
|
|
@ -334,6 +334,19 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR32Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16B16A16Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR32Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR8G8Unorm;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -111,14 +111,15 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
|
||||
for (const auto& image : info.images) {
|
||||
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
||||
const auto& image_view = texture_cache.FindImageView(tsharp);
|
||||
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
|
|
|
@ -196,7 +196,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
|
||||
const auto color_blend = LiverpoolToVK::BlendOp(control.color_func);
|
||||
attachments[i] = vk::PipelineColorBlendAttachmentState{
|
||||
.blendEnable = key.blend_controls[i].enable,
|
||||
.blendEnable = control.enable,
|
||||
.srcColorBlendFactor = src_color,
|
||||
.dstColorBlendFactor = dst_color,
|
||||
.colorBlendOp = color_blend,
|
||||
|
@ -215,6 +215,29 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
|
||||
: key.write_masks[i],
|
||||
};
|
||||
|
||||
// On GCN GPU there is an additional mask which allows to control color components exported
|
||||
// from a pixel shader. A situation possible, when the game may mask out the alpha channel,
|
||||
// while it is still need to be used in blending ops. For such cases, HW will default alpha
|
||||
// to 1 and perform the blending, while shader normally outputs 0 in the last component.
|
||||
// Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting
|
||||
// such cases and override alpha value in order to emulate HW behaviour.
|
||||
const auto has_alpha_masked_out =
|
||||
(key.cb_shader_mask.GetMask(i) & Liverpool::ColorBufferMask::ComponentA) == 0;
|
||||
const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha ||
|
||||
src_color == vk::BlendFactor::eOneMinusSrcAlpha;
|
||||
const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha ||
|
||||
dst_color == vk::BlendFactor::eOneMinusSrcAlpha;
|
||||
if (has_alpha_masked_out && has_src_alpha_in_src_blend) {
|
||||
attachments[i].srcColorBlendFactor = src_color == vk::BlendFactor::eSrcAlpha
|
||||
? vk::BlendFactor::eOne
|
||||
: vk::BlendFactor::eZero; // 1-A
|
||||
}
|
||||
if (has_alpha_masked_out && has_src_alpha_in_dst_blend) {
|
||||
attachments[i].dstColorBlendFactor = dst_color == vk::BlendFactor::eSrcAlpha
|
||||
? vk::BlendFactor::eOne
|
||||
: vk::BlendFactor::eZero; // 1-A
|
||||
}
|
||||
}
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
|
@ -318,7 +341,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
|
||||
for (const auto& image : stage.images) {
|
||||
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
||||
const auto& image_view = texture_cache.FindImageView(tsharp);
|
||||
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
|
||||
vk::ImageLayout::eShaderReadOnlyOptimal);
|
||||
set_writes.push_back({
|
||||
|
@ -326,7 +349,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
|
@ -387,11 +411,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
|||
|
||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]};
|
||||
for (auto range : ranges) {
|
||||
auto& prev_range = ranges.back();
|
||||
auto& prev_range = ranges_merged.back();
|
||||
if (prev_range.end_address < range.base_address) {
|
||||
ranges_merged.emplace_back(range);
|
||||
} else {
|
||||
ranges_merged.back().end_address = std::max(prev_range.end_address, range.end_address);
|
||||
prev_range.end_address = std::max(prev_range.end_address, range.end_address);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ struct GraphicsPipelineKey {
|
|||
Liverpool::CullMode cull_mode;
|
||||
Liverpool::FrontFace front_face;
|
||||
Liverpool::ClipSpace clip_space;
|
||||
Liverpool::ColorBufferMask cb_shader_mask{};
|
||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
|
||||
|
|
|
@ -132,6 +132,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||
!col_buf.info.blend_bypass);
|
||||
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
||||
key.cb_shader_mask = regs.color_shader_mask;
|
||||
|
||||
++remapped_cb;
|
||||
}
|
||||
|
|
|
@ -60,13 +60,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
});
|
||||
}
|
||||
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
|
||||
const bool is_clear = regs.depth_render_control.depth_clear_enable;
|
||||
const auto& image_view =
|
||||
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
|
||||
depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
.loadOp = vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
|
||||
.stencil = regs.stencil_clear}},
|
||||
};
|
||||
num_depth_attachments++;
|
||||
}
|
||||
|
|
|
@ -160,10 +160,10 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
|
|||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
|
||||
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
|
||||
Image& image = FindImage(ImageInfo{desc}, desc.Address());
|
||||
|
||||
if (image.info.is_storage) {
|
||||
if (is_storage) {
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
} else {
|
||||
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead);
|
||||
|
@ -194,6 +194,10 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe
|
|||
auto& image = FindImage(info, buffer.Address(), false);
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
image.Transit(vk::ImageLayout::eDepthStencilAttachmentOptimal,
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead);
|
||||
|
||||
ImageViewInfo view_info;
|
||||
view_info.format = info.pixel_format;
|
||||
return RegisterImageView(image, view_info);
|
||||
|
|
|
@ -41,7 +41,7 @@ public:
|
|||
bool refresh_on_create = true);
|
||||
|
||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);
|
||||
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage);
|
||||
|
||||
/// Retrieves the render target with specified properties
|
||||
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue