mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-10 20:53:15 +00:00
renderer_vulkan: Implement rectlist emulation with tessellation (#1857)
* renderer_vulkan: Implement rectlist emulation with tessellation * clang format * renderer_vulkan: Use tessellation for quad primitive as well * vk_rasterizer: Handle viewport enable flags * review * shader_recompiler: Fix quad/rect list FS passthrough semantics. * spirv: Bump to 1.5 * remove pragma --------- Co-authored-by: squidbus <175574877+squidbus@users.noreply.github.com>
This commit is contained in:
parent
c2e9c877dd
commit
092d42e981
15 changed files with 426 additions and 123 deletions
|
@ -116,12 +116,12 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
|
|||
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
|
||||
case AmdGpu::PrimitiveType::PatchPrimitive:
|
||||
return vk::PrimitiveTopology::ePatchList;
|
||||
case AmdGpu::PrimitiveType::QuadList:
|
||||
case AmdGpu::PrimitiveType::Polygon:
|
||||
// Needs to generate index buffer on the fly.
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case AmdGpu::PrimitiveType::QuadList:
|
||||
case AmdGpu::PrimitiveType::RectList:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
return vk::PrimitiveTopology::ePatchList;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
|
|
|
@ -70,34 +70,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color
|
|||
|
||||
vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags);
|
||||
|
||||
static constexpr u16 NumVerticesPerQuad = 4;
|
||||
|
||||
inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
|
||||
u16* out_data = reinterpret_cast<u16*>(out_ptr);
|
||||
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
|
||||
*out_data++ = i;
|
||||
*out_data++ = i + 1;
|
||||
*out_data++ = i + 2;
|
||||
*out_data++ = i;
|
||||
*out_data++ = i + 2;
|
||||
*out_data++ = i + 3;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) {
|
||||
T* out_data = reinterpret_cast<T*>(out_ptr);
|
||||
const T* in_data = reinterpret_cast<const T*>(in_ptr);
|
||||
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
|
||||
*out_data++ = in_data[i];
|
||||
*out_data++ = in_data[i + 1];
|
||||
*out_data++ = in_data[i + 2];
|
||||
*out_data++ = in_data[i];
|
||||
*out_data++ = in_data[i + 2];
|
||||
*out_data++ = in_data[i + 3];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
|
||||
u16* out_data = reinterpret_cast<u16*>(out_ptr);
|
||||
for (u16 i = 1; i < num_vertices - 1; i++) {
|
||||
|
|
|
@ -7,25 +7,30 @@
|
|||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
|
||||
vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
|
||||
std::span<const vk::ShaderModule> modules)
|
||||
using Shader::Backend::SPIRV::AuxShaderType;
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(
|
||||
const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
|
||||
const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
|
||||
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
|
||||
std::span<const vk::ShaderModule> modules)
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
|
||||
fetch_shader{std::move(fetch_shader_)} {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
@ -88,11 +93,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pVertexAttributeDescriptions = vertex_attributes.data(),
|
||||
};
|
||||
|
||||
if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Rectangle List primitive type is only supported for embedded VS");
|
||||
}
|
||||
|
||||
auto prim_restart = key.enable_primitive_restart != 0;
|
||||
if (prim_restart && IsPrimitiveListTopology() && !instance.IsListRestartSupported()) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
|
@ -106,9 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
ASSERT_MSG(!prim_restart || key.primitive_restart_index == 0xFFFF ||
|
||||
key.primitive_restart_index == 0xFFFFFFFF,
|
||||
"Primitive restart index other than -1 is not supported yet");
|
||||
|
||||
const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList;
|
||||
const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList;
|
||||
const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info;
|
||||
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
|
||||
.patchControlPoints = key.patch_control_points,
|
||||
.patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points),
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
|
@ -232,6 +234,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
} else if (is_rect_list || is_quad_list) {
|
||||
const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS;
|
||||
auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info);
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationControl,
|
||||
.module = CompileSPV(tcs, instance.GetDevice()),
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::LogicalStage::TessellationEval);
|
||||
if (infos[stage]) {
|
||||
|
@ -240,6 +250,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
} else if (is_rect_list || is_quad_list) {
|
||||
auto tes =
|
||||
Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, fs_info);
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
|
||||
.module = CompileSPV(tes, instance.GetDevice()),
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::LogicalStage::Fragment);
|
||||
if (infos[stage]) {
|
||||
|
@ -322,8 +340,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pTessellationState =
|
||||
stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
|
||||
.pTessellationState = &tessellation_state,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
|
|
|
@ -18,7 +18,7 @@ class TextureCache;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr u32 MaxShaderStages = 5;
|
||||
static constexpr u32 MaxShaderStages = static_cast<u32>(Shader::LogicalStage::NumLogicalStages);
|
||||
static constexpr u32 MaxVertexBufferCount = 32;
|
||||
|
||||
class Instance;
|
||||
|
@ -64,6 +64,7 @@ public:
|
|||
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
|
||||
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> stages,
|
||||
std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
|
||||
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
|
||||
std::span<const vk::ShaderModule> modules);
|
||||
~GraphicsPipeline();
|
||||
|
@ -72,11 +73,6 @@ public:
|
|||
return fetch_shader;
|
||||
}
|
||||
|
||||
bool IsEmbeddedVs() const noexcept {
|
||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
|
||||
}
|
||||
|
||||
auto GetWriteMasks() const {
|
||||
return key.write_masks;
|
||||
}
|
||||
|
|
|
@ -80,8 +80,8 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
|||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||
auto info = Shader::RuntimeInfo{stage};
|
||||
const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||
auto& info = runtime_infos[u32(l_stage)];
|
||||
const auto& regs = liverpool->regs;
|
||||
const auto BuildCommon = [&](const auto& program) {
|
||||
info.num_user_data = program.settings.num_user_regs;
|
||||
|
@ -90,6 +90,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
|
|||
info.fp_denorm_mode32 = program.settings.fp_denorm_mode32;
|
||||
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
||||
};
|
||||
info.Initialize(stage);
|
||||
switch (stage) {
|
||||
case Stage::Local: {
|
||||
BuildCommon(regs.ls_program);
|
||||
|
@ -220,9 +221,9 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
|||
}
|
||||
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
||||
if (is_new) {
|
||||
it.value() =
|
||||
std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap, graphics_key,
|
||||
*pipeline_cache, infos, fetch_shader, modules);
|
||||
it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap,
|
||||
graphics_key, *pipeline_cache, infos,
|
||||
runtime_infos, fetch_shader, modules);
|
||||
if (Config::collectShadersForDebug()) {
|
||||
for (auto stage = 0; stage < MaxShaderStages; ++stage) {
|
||||
if (infos[stage]) {
|
||||
|
|
|
@ -76,7 +76,7 @@ private:
|
|||
vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
Shader::Backend::Bindings& binding);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
|
||||
const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
@ -90,6 +90,7 @@ private:
|
|||
tsl::robin_map<size_t, std::unique_ptr<Program>> program_cache;
|
||||
tsl::robin_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
||||
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
std::array<Shader::RuntimeInfo, MaxShaderStages> runtime_infos{};
|
||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules{};
|
||||
std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
|
||||
|
|
|
@ -245,7 +245,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
}
|
||||
|
||||
auto state = PrepareRenderState(pipeline->GetMrtMask());
|
||||
|
||||
if (!BindResources(pipeline)) {
|
||||
return;
|
||||
}
|
||||
|
@ -267,10 +266,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
|
||||
instance_offset);
|
||||
} else {
|
||||
const u32 num_vertices =
|
||||
regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices;
|
||||
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
|
||||
instance_offset);
|
||||
cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset);
|
||||
}
|
||||
|
||||
ResetBindings();
|
||||
|
@ -285,18 +281,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
|||
}
|
||||
|
||||
const auto& regs = liverpool->regs;
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList ||
|
||||
regs.primitive_type == AmdGpu::PrimitiveType::Polygon) {
|
||||
// We use a generated index buffer to convert quad lists and polygons to triangles. Since it
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) {
|
||||
// We use a generated index buffer to convert polygons to triangles. Since it
|
||||
// changes type of the draw, arguments are not valid for this case. We need to run a
|
||||
// conversion pass to repack the indirect arguments buffer first.
|
||||
LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw");
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList,
|
||||
"Unsupported primitive type for indirect draw");
|
||||
|
||||
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
|
||||
if (!pipeline) {
|
||||
return;
|
||||
|
@ -1009,19 +1001,26 @@ void Rasterizer::UpdateViewportScissorState() {
|
|||
regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW
|
||||
? 1.0f
|
||||
: 0.0f;
|
||||
const auto vp_ctl = regs.viewport_control;
|
||||
for (u32 i = 0; i < Liverpool::NumViewports; i++) {
|
||||
const auto& vp = regs.viewports[i];
|
||||
const auto& vp_d = regs.viewport_depths[i];
|
||||
if (vp.xscale == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f;
|
||||
const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f;
|
||||
const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f;
|
||||
const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f;
|
||||
const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f;
|
||||
const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f;
|
||||
viewports.push_back({
|
||||
.x = vp.xoffset - vp.xscale,
|
||||
.y = vp.yoffset - vp.yscale,
|
||||
.width = vp.xscale * 2.0f,
|
||||
.height = vp.yscale * 2.0f,
|
||||
.minDepth = vp.zoffset - vp.zscale * reduce_z,
|
||||
.maxDepth = vp.zscale + vp.zoffset,
|
||||
.x = xoffset - xscale,
|
||||
.y = yoffset - yscale,
|
||||
.width = xscale * 2.0f,
|
||||
.height = yscale * 2.0f,
|
||||
.minDepth = zoffset - zscale * reduce_z,
|
||||
.maxDepth = zscale + zoffset,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,10 @@ EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
|
|||
return EShLanguage::EShLangVertex;
|
||||
case vk::ShaderStageFlagBits::eGeometry:
|
||||
return EShLanguage::EShLangGeometry;
|
||||
case vk::ShaderStageFlagBits::eTessellationControl:
|
||||
return EShLanguage::EShLangTessControl;
|
||||
case vk::ShaderStageFlagBits::eTessellationEvaluation:
|
||||
return EShLanguage::EShLangTessEvaluation;
|
||||
case vk::ShaderStageFlagBits::eFragment:
|
||||
return EShLanguage::EShLangFragment;
|
||||
case vk::ShaderStageFlagBits::eCompute:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue