Handle mixed samples attachments during draws

This commit is contained in:
Marcin Mikołajczyk 2025-06-13 23:28:05 +01:00
parent 4bfa8c9fc7
commit 8095cd493c
9 changed files with 107 additions and 18 deletions

View file

@ -108,6 +108,7 @@ GraphicsPipeline::GraphicsPipeline(
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
// if dynamic rasterization samples state is enabled, this field is ignored
.rasterizationSamples =
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
.sampleShadingEnable = false,
@ -121,7 +122,7 @@ GraphicsPipeline::GraphicsPipeline(
.pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr,
};
boost::container::static_vector<vk::DynamicState, 20> dynamic_states = {
boost::container::static_vector<vk::DynamicState, 22> dynamic_states = {
vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount,
vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable,
vk::DynamicState::eDepthWriteEnable, vk::DynamicState::eDepthCompareOp,
@ -147,6 +148,9 @@ GraphicsPipeline::GraphicsPipeline(
} else if (!vertex_bindings.empty()) {
dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStride);
}
if (instance.IsDynamicRasterizationSamplesSupported()) {
dynamic_states.push_back(vk::DynamicState::eRasterizationSamplesEXT);
}
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),

View file

@ -39,7 +39,7 @@ struct GraphicsPipelineKey {
vk::Format depth_format;
vk::Format stencil_format;
u32 num_samples;
u32 num_samples{};
u32 mrt_mask;
AmdGpu::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode;

View file

@ -213,7 +213,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
vk::PhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT>();
features = feature_chain.get().features;
const vk::StructureChain properties_chain = physical_device.getProperties2<
@ -257,6 +258,8 @@ bool Instance::CreateDevice() {
feature_chain.get<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
LOG_INFO(Render_Vulkan, "- extendedDynamicState3ColorWriteMask: {}",
dynamic_state_3_features.extendedDynamicState3ColorWriteMask);
LOG_INFO(Render_Vulkan, "- extendedDynamicState3RasterizationSamples: {}",
dynamic_state_3_features.extendedDynamicState3RasterizationSamples);
}
robustness2 = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
if (robustness2) {
@ -300,6 +303,8 @@ bool Instance::CreateDevice() {
Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
}
dynamic_rendering_unused_attachments =
add_extension(VK_EXT_DYNAMIC_RENDERING_UNUSED_ATTACHMENTS_EXTENSION_NAME);
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@ -409,6 +414,8 @@ bool Instance::CreateDevice() {
.customBorderColorWithoutFormat = true,
},
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
.extendedDynamicState3RasterizationSamples =
dynamic_state_3_features.extendedDynamicState3RasterizationSamples,
.extendedDynamicState3ColorWriteMask =
dynamic_state_3_features.extendedDynamicState3ColorWriteMask,
},
@ -449,6 +456,9 @@ bool Instance::CreateDevice() {
.workgroupMemoryExplicitLayout16BitAccess =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess,
},
vk::PhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT{
.dynamicRenderingUnusedAttachments = true,
},
#ifdef __APPLE__
vk::PhysicalDevicePortabilitySubsetFeaturesKHR{
.constantAlphaColorBlendFactors = portability_features.constantAlphaColorBlendFactors,
@ -502,6 +512,9 @@ bool Instance::CreateDevice() {
if (!workgroup_memory_explicit_layout) {
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
}
if (!dynamic_rendering_unused_attachments) {
device_chain.unlink<vk::PhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View file

@ -114,6 +114,13 @@ public:
return depth_range_unrestricted;
}
/// Returns true when the extendedDynamicState3RasterizationSamples feature of
/// VK_EXT_extended_dynamic_state3 is supported.
bool IsDynamicRasterizationSamplesSupported() const {
return dynamic_state_3 &&
dynamic_state_3_features.extendedDynamicState3RasterizationSamples;
}
/// Returns true when the extendedDynamicState3ColorWriteMask feature of
/// VK_EXT_extended_dynamic_state3 is supported.
bool IsDynamicColorWriteMaskSupported() const {
@ -390,6 +397,7 @@ private:
bool amd_shader_trinary_minmax{};
bool shader_atomic_float2{};
bool workgroup_memory_explicit_layout{};
bool dynamic_rendering_unused_attachments{};
bool portability_subset{};
};

View file

@ -303,7 +303,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
key.clip_space = regs.clipper_control.clip_space;
key.num_samples = regs.NumSamples();
key.num_samples = instance.IsDynamicRasterizationSamplesSupported() ? 1 : regs.NumSamples();
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;

View file

@ -269,6 +269,34 @@ void Rasterizer::EliminateFastClear() {
ScopeMarkerEnd();
}
std::vector<u32> Rasterizer::UniqueSampleCounts() const {
const auto& regs = liverpool->regs;
using Liverpool = AmdGpu::Liverpool;
std::vector<u32> result{};
if (!regs.mode_control.msaa_enable) {
result.push_back(1);
return std::move(result);
}
std::vector<u32> samples{};
if (regs.color_control.mode != Liverpool::ColorControl::OperationMode::Disable) {
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
const auto& col_buf = regs.color_buffers[cb];
if (!col_buf) {
continue;
}
samples.push_back(col_buf.NumSamples());
}
}
if (regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid()) {
samples.push_back(regs.depth_buffer.NumSamples());
}
std::ranges::unique_copy(samples, std::back_inserter(result));
std::ranges::sort(result, std::ranges::greater());
return std::move(result);
}
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
RENDERER_TRACE;
@ -282,7 +310,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return;
}
auto state = PrepareRenderState(pipeline->GetMrtMask());
auto full_state = PrepareRenderState(pipeline->GetMrtMask());
if (!BindResources(pipeline)) {
return;
}
@ -291,10 +319,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
if (is_indexed) {
buffer_cache.BindIndexBuffer(index_offset);
}
BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline);
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
const auto& fetch_shader = pipeline->GetFetchShader();
const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader);
@ -302,12 +326,31 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0,
s32(vertex_offset), instance_offset);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset,
instance_offset);
for (auto samples : UniqueSampleCounts()) {
auto state = full_state;
auto sample_count =
LiverpoolToVK::NumSamples(samples, instance.GetFramebufferSampleCounts());
for (auto i = 0; i < AmdGpu::Liverpool::NumColorBuffers; ++i) {
if (regs.color_buffers[i] && regs.color_buffers[i].NumSamples() != u32(sample_count)) {
state.color_attachments[i].imageView = VK_NULL_HANDLE;
}
}
if (state.has_depth && regs.depth_buffer.NumSamples() != u32(sample_count)) {
state.depth_attachment.imageView = VK_NULL_HANDLE;
}
if (state.has_stencil && regs.depth_buffer.NumSamples() != u32(sample_count)) {
state.stencil_attachment.imageView = VK_NULL_HANDLE;
}
BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline, sample_count);
if (is_indexed) {
cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0,
s32(vertex_offset), instance_offset);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset,
instance_offset);
}
}
ResetBindings();
@ -321,6 +364,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
return;
}
const auto& regs = liverpool->regs;
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
if (!pipeline) {
return;
@ -346,7 +390,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
}
BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline);
UpdateDynamicState(*pipeline, LiverpoolToVK::NumSamples(regs.NumSamples(),
instance.GetFramebufferSampleCounts()));
// We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and
// instance offsets will be automatically applied by Vulkan from indirect args buffer.
@ -993,7 +1038,8 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
}
}
void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const {
void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline,
vk::SampleCountFlagBits sample_count) const {
UpdateViewportScissorState();
UpdateDepthStencilState();
UpdatePrimitiveState();
@ -1001,6 +1047,7 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const {
auto& dynamic_state = scheduler.GetDynamicState();
dynamic_state.SetBlendConstants(&liverpool->regs.blend_constants.red);
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
dynamic_state.SetRasterizationSamples(sample_count);
// Commit new dynamic state to the command buffer.
dynamic_state.Commit(instance, scheduler.CommandBuffer());

View file

@ -86,8 +86,10 @@ private:
void Resolve();
void DepthStencilCopy(bool is_depth, bool is_stencil);
void EliminateFastClear();
std::vector<u32> UniqueSampleCounts() const;
void UpdateDynamicState(const GraphicsPipeline& pipeline) const;
void UpdateDynamicState(const GraphicsPipeline& pipeline,
vk::SampleCountFlagBits sample_count) const;
void UpdateViewportScissorState() const;
void UpdateDepthStencilState() const;
void UpdatePrimitiveState() const;

View file

@ -326,6 +326,12 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
cmdbuf.setColorWriteMaskEXT(0, color_write_masks);
}
}
if (dirty_state.rasterization_samples) {
dirty_state.rasterization_samples = false;
if (instance.IsDynamicRasterizationSamplesSupported()) {
cmdbuf.setRasterizationSamplesEXT(rasterization_samples);
}
}
}
} // namespace Vulkan

View file

@ -101,6 +101,7 @@ struct DynamicState {
bool blend_constants : 1;
bool color_write_masks : 1;
bool rasterization_samples : 1;
} dirty_state{};
Viewports viewports{};
@ -135,6 +136,7 @@ struct DynamicState {
float blend_constants[4]{};
ColorWriteMasks color_write_masks{};
vk::SampleCountFlagBits rasterization_samples{};
/// Commits the dynamic state to the provided command buffer.
void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf);
@ -296,6 +298,13 @@ struct DynamicState {
dirty_state.color_write_masks = true;
}
}
void SetRasterizationSamples(const vk::SampleCountFlagBits rasterization_samples_) {
if (rasterization_samples != rasterization_samples_) {
rasterization_samples = rasterization_samples_;
dirty_state.rasterization_samples = true;
}
}
};
class Scheduler {