From 7b8177f48e3d5dfa12b2d416fdf291572e56b905 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sat, 18 Jan 2025 09:20:38 +0300 Subject: [PATCH] renderer: handle disabled clipping (#2146) Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> --- .../backend/spirv/emit_spirv_special.cpp | 38 +++++++++++ .../backend/spirv/spirv_emit_context.cpp | 37 ++++++---- src/shader_recompiler/info.h | 8 +++ src/shader_recompiler/profile.h | 2 + src/shader_recompiler/runtime_info.h | 4 +- .../renderer_vulkan/vk_graphics_pipeline.h | 7 +- .../renderer_vulkan/vk_instance.cpp | 6 ++ src/video_core/renderer_vulkan/vk_instance.h | 8 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 5 ++ .../renderer_vulkan/vk_rasterizer.cpp | 67 ++++++++++++++----- .../renderer_vulkan/vk_rasterizer.h | 2 +- 11 files changed, 149 insertions(+), 35 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 4a22ba09f..a0a3ed8ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -24,10 +24,48 @@ void ConvertDepthMode(EmitContext& ctx) { ctx.OpStore(ctx.output_position, vector); } +void ConvertPositionToClipSpace(EmitContext& ctx) { + const Id type{ctx.F32[1]}; + Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id x{ctx.OpCompositeExtract(type, position, 0u)}; + const Id y{ctx.OpCompositeExtract(type, position, 1u)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id xoffset_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, type), + ctx.push_data_block, + ctx.ConstU32(PushData::XOffsetIndex))}; + const Id xoffset{ctx.OpLoad(type, xoffset_ptr)}; + const Id yoffset_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, type), + ctx.push_data_block, + ctx.ConstU32(PushData::YOffsetIndex))}; + const Id yoffset{ctx.OpLoad(type, yoffset_ptr)}; + const Id xscale_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, type), + ctx.push_data_block, + ctx.ConstU32(PushData::XScaleIndex))}; + const Id xscale{ctx.OpLoad(type, xscale_ptr)}; + const Id yscale_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, type), + ctx.push_data_block, + ctx.ConstU32(PushData::YScaleIndex))}; + const Id yscale{ctx.OpLoad(type, yscale_ptr)}; + const Id vport_w = + ctx.Constant(type, float(std::min(ctx.profile.max_viewport_width / 2, 8_KB))); + const Id wnd_x = ctx.OpFAdd(type, ctx.OpFMul(type, x, xscale), xoffset); + const Id ndc_x = ctx.OpFSub(type, ctx.OpFDiv(type, wnd_x, vport_w), ctx.Constant(type, 1.f)); + const Id vport_h = + ctx.Constant(type, float(std::min(ctx.profile.max_viewport_height / 2, 8_KB))); + const Id wnd_y = ctx.OpFAdd(type, ctx.OpFMul(type, y, yscale), yoffset); + const Id ndc_y = ctx.OpFSub(type, ctx.OpFDiv(type, wnd_y, vport_h), ctx.Constant(type, 1.f)); + const Id vector{ctx.OpCompositeConstruct(ctx.F32[4], std::array({ndc_x, ndc_y, z, w}))}; + ctx.OpStore(ctx.output_position, vector); +} + void EmitEpilogue(EmitContext& ctx) { if (ctx.stage == Stage::Vertex && ctx.runtime_info.vs_info.emulate_depth_negative_one_to_one) { ConvertDepthMode(ctx); } + if (ctx.stage == Stage::Vertex && ctx.runtime_info.vs_info.clip_disable) { + ConvertPositionToClipSpace(ctx); + } } void EmitDiscard(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 295bef75d..b0bf5aa0a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -568,25 +568,34 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates - const Id struct_type{Name( - TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4]), "AuxData")}; + const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4], U32[4], + U32[4], F32[1], F32[1], F32[1], F32[1]), + "AuxData")}; Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "sr0"); MemberName(struct_type, 1, "sr1"); - MemberName(struct_type, 2, "buf_offsets0"); - MemberName(struct_type, 3, "buf_offsets1"); - MemberName(struct_type, 4, "ud_regs0"); - MemberName(struct_type, 5, "ud_regs1"); - MemberName(struct_type, 6, "ud_regs2"); - MemberName(struct_type, 7, "ud_regs3"); + MemberName(struct_type, Shader::PushData::BufOffsetIndex + 0, "buf_offsets0"); + MemberName(struct_type, Shader::PushData::BufOffsetIndex + 1, "buf_offsets1"); + MemberName(struct_type, Shader::PushData::UdRegsIndex + 0, "ud_regs0"); + MemberName(struct_type, Shader::PushData::UdRegsIndex + 1, "ud_regs1"); + MemberName(struct_type, Shader::PushData::UdRegsIndex + 2, "ud_regs2"); + MemberName(struct_type, Shader::PushData::UdRegsIndex + 3, "ud_regs3"); + MemberName(struct_type, Shader::PushData::XOffsetIndex, "xoffset"); + MemberName(struct_type, Shader::PushData::YOffsetIndex, "yoffset"); + MemberName(struct_type, Shader::PushData::XScaleIndex, "xscale"); + MemberName(struct_type, Shader::PushData::YScaleIndex, "yscale"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); - MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U); - MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U); - MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U); - MemberDecorate(struct_type, 5, spv::Decoration::Offset, 56U); - MemberDecorate(struct_type, 6, spv::Decoration::Offset, 72U); - MemberDecorate(struct_type, 7, spv::Decoration::Offset, 88U); + MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 8U); + MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 24U); + MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 0, spv::Decoration::Offset, 40U); + MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 1, spv::Decoration::Offset, 56U); + MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 2, spv::Decoration::Offset, 72U); + MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 3, spv::Decoration::Offset, 88U); + MemberDecorate(struct_type, Shader::PushData::XOffsetIndex, spv::Decoration::Offset, 104U); + MemberDecorate(struct_type, Shader::PushData::YOffsetIndex, spv::Decoration::Offset, 108U); + MemberDecorate(struct_type, Shader::PushData::XScaleIndex, spv::Decoration::Offset, 112U); + MemberDecorate(struct_type, Shader::PushData::YScaleIndex, spv::Decoration::Offset, 116U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index aeff346fa..2cde30629 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -96,11 +96,19 @@ using FMaskResourceList = boost::container::small_vector; struct PushData { static constexpr u32 BufOffsetIndex = 2; static constexpr u32 UdRegsIndex = 4; + static constexpr u32 XOffsetIndex = 8; + static constexpr u32 YOffsetIndex = 9; + static constexpr u32 XScaleIndex = 10; + static constexpr u32 YScaleIndex = 11; u32 step0; u32 step1; std::array buf_offsets; std::array ud_regs; + float xoffset; + float yoffset; + float xscale; + float yscale; void AddOffset(u32 binding, u32 offset) { ASSERT(offset < 256 && binding < buf_offsets.size()); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f8878d442..f8b91a283 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -30,6 +30,8 @@ struct Profile { bool needs_manual_interpolation{}; bool needs_lds_barriers{}; u64 min_ssbo_alignment{}; + u32 max_viewport_width{}; + u32 max_viewport_height{}; }; } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 8a5e0a1a6..2bf5e3f0a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -84,6 +84,7 @@ struct VertexRuntimeInfo { u32 num_outputs; std::array outputs; bool emulate_depth_negative_one_to_one{}; + bool clip_disable{}; // Domain AmdGpu::TessellationType tess_type; AmdGpu::TessellationTopology tess_topology; @@ -92,7 +93,8 @@ struct VertexRuntimeInfo { bool operator==(const VertexRuntimeInfo& other) const noexcept { return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && - tess_type == other.tess_type && tess_topology == other.tess_topology && + clip_disable == other.clip_disable && tess_type == other.tess_type && + tess_topology == other.tess_topology && tess_partitioning == other.tess_partitioning && hs_output_cp_stride == other.hs_output_cp_stride; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index f696c1f72..9a20f94c1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -42,13 +42,14 @@ struct GraphicsPipelineKey { vk::Format stencil_format; struct { + bool clip_disable : 1; bool depth_test_enable : 1; bool depth_write_enable : 1; bool depth_bounds_test_enable : 1; bool depth_bias_enable : 1; bool stencil_test_enable : 1; // Must be named to be zero-initialized. - u8 _unused : 3; + u8 _unused : 2; }; vk::CompareOp depth_compare_op; @@ -94,6 +95,10 @@ public: return key.mrt_mask; } + auto IsClipDisabled() const { + return key.clip_disable; + } + [[nodiscard]] bool IsPrimitiveListTopology() const { return key.prim_type == AmdGpu::PrimitiveType::PointList || key.prim_type == AmdGpu::PrimitiveType::LineList || diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 1600600b9..d9577a612 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -208,6 +208,7 @@ std::string Instance::GetDriverVersionName() { bool Instance::CreateDevice() { const vk::StructureChain feature_chain = physical_device.getFeatures2< vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, + vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDeviceCustomBorderColorFeaturesEXT, @@ -317,6 +318,9 @@ bool Instance::CreateDevice() { .pQueuePriorities = queue_priorities.data(), }; + const auto topology_list_restart_features = + feature_chain.get(); + const auto vk12_features = feature_chain.get(); vk::StructureChain device_chain = { vk::DeviceCreateInfo{ @@ -406,6 +410,8 @@ bool Instance::CreateDevice() { }, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{ .primitiveTopologyListRestart = true, + .primitiveTopologyPatchListRestart = + topology_list_restart_features.primitiveTopologyPatchListRestart, }, vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{ .fragmentShaderBarycentric = true, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 1b0c276dc..8c4752c3f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -279,6 +279,14 @@ public: return min_imported_host_pointer_alignment; } + u32 GetMaxViewportWidth() const { + return properties.limits.maxViewportDimensions[0]; + } + + u32 GetMaxViewportHeight() const { + return properties.limits.maxViewportDimensions[1]; + } + /// Returns the sample count flags supported by framebuffers. vk::SampleCountFlags GetFramebufferSampleCounts() const { return properties.limits.framebufferColorSampleCounts & diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 86f6dcc7a..c6a56745d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -125,6 +125,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; + info.vs_info.clip_disable = graphics_key.clip_disable; if (l_stage == LogicalStage::TessellationEval) { info.vs_info.tess_type = regs.tess_config.type; info.vs_info.tess_topology = regs.tess_config.topology; @@ -210,6 +211,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || instance.GetDriverID() == vk::DriverId::eMoltenvk, + .max_viewport_width = instance.GetMaxViewportWidth(), + .max_viewport_height = instance.GetMaxViewportHeight(), }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", @@ -262,6 +265,8 @@ bool PipelineCache::RefreshGraphicsKey() { auto& regs = liverpool->regs; auto& key = graphics_key; + key.clip_disable = + regs.clipper_control.clip_disable || regs.primitive_type == AmdGpu::PrimitiveType::RectList; key.depth_test_enable = regs.depth_control.depth_enable; key.depth_write_enable = regs.depth_control.depth_write_enable && !regs.depth_render_control.depth_clear_enable; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bac647125..07369c620 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -504,6 +504,17 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { } push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step1 = regs.vgt_instance_step_rate_1; + + // TODO(roamic): add support for multiple viewports and geometry shaders when ViewportIndex + // is encountered and implemented in the recompiler. + if (stage->stage == Shader::Stage::Vertex) { + push_data.xoffset = + regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f; + push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f; + push_data.yoffset = + regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f; + push_data.yscale = regs.viewport_control.yscale_enable ? regs.viewports[0].yscale : 1.f; + } stage->PushUd(binding, push_data); BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers); @@ -1032,7 +1043,7 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) { } void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { - UpdateViewportScissorState(); + UpdateViewportScissorState(pipeline); auto& regs = liverpool->regs; const auto cmdbuf = scheduler.CommandBuffer(); @@ -1112,7 +1123,7 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { } } -void Rasterizer::UpdateViewportScissorState() { +void Rasterizer::UpdateViewportScissorState(const GraphicsPipeline& pipeline) { const auto& regs = liverpool->regs; const auto combined_scissor_value_tl = [](s16 scr, s16 win, s16 gen, s16 win_offset) { @@ -1151,26 +1162,46 @@ void Rasterizer::UpdateViewportScissorState() { ? 1.0f : 0.0f; + if (regs.polygon_control.enable_window_offset) { + LOG_ERROR(Render_Vulkan, + "PA_SU_SC_MODE_CNTL.VTX_WINDOW_OFFSET_ENABLE support is not yet implemented."); + } + for (u32 i = 0; i < Liverpool::NumViewports; i++) { const auto& vp = regs.viewports[i]; const auto& vp_d = regs.viewport_depths[i]; if (vp.xscale == 0) { continue; } - const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f; - const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f; - const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f; - const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f; - const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f; - const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f; - viewports.push_back({ - .x = xoffset - xscale, - .y = yoffset - yscale, - .width = xscale * 2.0f, - .height = yscale * 2.0f, - .minDepth = zoffset - zscale * reduce_z, - .maxDepth = zscale + zoffset, - }); + + if (pipeline.IsClipDisabled()) { + // In case if clipping is disabled we patch the shader to convert vertex position + // from screen space coordinates to NDC by defining a render space as full hardware + // window range [0..16383, 0..16383] and setting the viewport to its size. + viewports.push_back({ + .x = 0.f, + .y = 0.f, + .width = float(std::min(instance.GetMaxViewportWidth(), 16_KB)), + .height = float(std::min(instance.GetMaxViewportHeight(), 16_KB)), + .minDepth = 0.0, + .maxDepth = 1.0, + }); + } else { + const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f; + const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f; + const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f; + const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f; + const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f; + const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f; + viewports.push_back({ + .x = xoffset - xscale, + .y = yoffset - yscale, + .width = xscale * 2.0f, + .height = yscale * 2.0f, + .minDepth = zoffset - zscale * reduce_z, + .maxDepth = zscale + zoffset, + }); + } auto vp_scsr = scsr; if (regs.mode_control.vport_scissor_enable) { @@ -1192,8 +1223,8 @@ void Rasterizer::UpdateViewportScissorState() { if (viewports.empty()) { // Vulkan requires providing at least one viewport. constexpr vk::Viewport empty_viewport = { - .x = 0.0f, - .y = 0.0f, + .x = -1.0f, + .y = -1.0f, .width = 1.0f, .height = 1.0f, .minDepth = 0.0f, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index abf58e522..ed6cc7e71 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -76,7 +76,7 @@ private: void EliminateFastClear(); void UpdateDynamicState(const GraphicsPipeline& pipeline); - void UpdateViewportScissorState(); + void UpdateViewportScissorState(const GraphicsPipeline& pipeline); bool FilterDraw();