video_core: Preliminary storage image support and more (#188)

* vk_rasterizer: Clear depth buffer when DB_RENDER_CONTROL says so

* video_core: Preliminary storage image support, more opcodes

* renderer_vulkan: a fix for vertex buffers merging

* renderer_vulkan: a heuristic for blend override when alpha out is masked

---------

Co-authored-by: psucien <bad_cast@protonmail.com>
This commit is contained in:
TheTurtle 2024-06-10 22:35:14 +03:00 committed by GitHub
parent 23f11a3fda
commit 7b1a317b09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 429 additions and 101 deletions

View file

@ -420,6 +420,13 @@ struct Liverpool {
};
union ColorBufferMask {
enum ColorComponent : u32 {
ComponentR = (1u << 0),
ComponentG = (1u << 1),
ComponentB = (1u << 2),
ComponentA = (1u << 3),
};
u32 raw;
BitField<0, 4, u32> output0_mask;
BitField<4, 4, u32> output1_mask;
@ -430,8 +437,8 @@ struct Liverpool {
BitField<24, 4, u32> output6_mask;
BitField<28, 4, u32> output7_mask;
[[nodiscard]] u8 GetMask(int buf_id) const {
return (raw >> (buf_id * 4)) & 0xffu;
u32 GetMask(int buf_id) const {
return (raw >> (buf_id * 4)) & 0xfu;
}
};
@ -732,6 +739,20 @@ struct Liverpool {
float back_offset;
};
struct Address {
u32 address;
VAddr GetAddress() const {
return u64(address) << 8;
}
};
union DepthRenderControl {
u32 raw;
BitField<0, 1, u32> depth_clear_enable;
BitField<1, 1, u32> stencil_clear_enable;
};
union Regs {
struct {
INSERT_PADDING_WORDS(0x2C08);
@ -740,11 +761,15 @@ struct Liverpool {
ShaderProgram vs_program;
INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16);
ComputeProgram cs_program;
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80);
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(4);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min;
float depth_bounds_max;
u32 stencil_clear;
u32 depth_clear;
float depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
DepthBuffer depth_buffer;
@ -925,6 +950,8 @@ static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
@ -942,6 +969,7 @@ static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);

View file

@ -334,6 +334,19 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16B16A16Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32Uint;
}
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR32Sint;
}
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -111,14 +111,15 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
for (const auto& image : info.images) {
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp);
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
}

View file

@ -196,7 +196,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
const auto color_blend = LiverpoolToVK::BlendOp(control.color_func);
attachments[i] = vk::PipelineColorBlendAttachmentState{
.blendEnable = key.blend_controls[i].enable,
.blendEnable = control.enable,
.srcColorBlendFactor = src_color,
.dstColorBlendFactor = dst_color,
.colorBlendOp = color_blend,
@ -215,6 +215,29 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
: key.write_masks[i],
};
// On GCN GPU there is an additional mask which allows to control color components exported
// from a pixel shader. A situation possible, when the game may mask out the alpha channel,
// while it is still need to be used in blending ops. For such cases, HW will default alpha
// to 1 and perform the blending, while shader normally outputs 0 in the last component.
// Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting
// such cases and override alpha value in order to emulate HW behaviour.
const auto has_alpha_masked_out =
(key.cb_shader_mask.GetMask(i) & Liverpool::ColorBufferMask::ComponentA) == 0;
const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha ||
src_color == vk::BlendFactor::eOneMinusSrcAlpha;
const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha ||
dst_color == vk::BlendFactor::eOneMinusSrcAlpha;
if (has_alpha_masked_out && has_src_alpha_in_src_blend) {
attachments[i].srcColorBlendFactor = src_color == vk::BlendFactor::eSrcAlpha
? vk::BlendFactor::eOne
: vk::BlendFactor::eZero; // 1-A
}
if (has_alpha_masked_out && has_src_alpha_in_dst_blend) {
attachments[i].dstColorBlendFactor = dst_color == vk::BlendFactor::eSrcAlpha
? vk::BlendFactor::eOne
: vk::BlendFactor::eZero; // 1-A
}
}
const vk::PipelineColorBlendStateCreateInfo color_blending = {
@ -318,7 +341,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& image : stage.images) {
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp);
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
vk::ImageLayout::eShaderReadOnlyOptimal);
set_writes.push_back({
@ -326,7 +349,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
}
@ -387,11 +411,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]};
for (auto range : ranges) {
auto& prev_range = ranges.back();
auto& prev_range = ranges_merged.back();
if (prev_range.end_address < range.base_address) {
ranges_merged.emplace_back(range);
} else {
ranges_merged.back().end_address = std::max(prev_range.end_address, range.end_address);
prev_range.end_address = std::max(prev_range.end_address, range.end_address);
}
}

View file

@ -46,6 +46,7 @@ struct GraphicsPipelineKey {
Liverpool::CullMode cull_mode;
Liverpool::FrontFace front_face;
Liverpool::ClipSpace clip_space;
Liverpool::ColorBufferMask cb_shader_mask{};
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;

View file

@ -132,6 +132,7 @@ void PipelineCache::RefreshGraphicsKey() {
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
key.cb_shader_mask = regs.color_shader_mask;
++remapped_cb;
}

View file

@ -60,13 +60,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
});
}
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
const bool is_clear = regs.depth_render_control.depth_clear_enable;
const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
};
num_depth_attachments++;
}

View file

@ -160,10 +160,10 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
return slot_image_views[view_id];
}
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
Image& image = FindImage(ImageInfo{desc}, desc.Address());
if (image.info.is_storage) {
if (is_storage) {
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
} else {
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead);
@ -194,6 +194,10 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
image.Transit(vk::ImageLayout::eDepthStencilAttachmentOptimal,
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead);
ImageViewInfo view_info;
view_info.format = info.pixel_format;
return RegisterImageView(image, view_info);

View file

@ -41,7 +41,7 @@ public:
bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage);
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,