video_core: Implement basic compute shaders and more instructions

This commit is contained in:
raphaelthegreat 2024-05-29 01:28:34 +03:00
parent 10bceb1643
commit 58de7ff55a
58 changed files with 1234 additions and 293 deletions

View file

@ -297,6 +297,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format5_6_5 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eB5G6R5UnormPack16;
}
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8Unorm;
}
UNREACHABLE();
}
@ -305,6 +312,10 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
return vk::Format::eD32SfloatS8Uint;
}
if (z_format == DepthBuffer::ZFormat::Z32Float &&
stencil_format == DepthBuffer::StencilFormat::Invalid) {
return vk::Format::eD32Sfloat;
}
UNREACHABLE();
}

View file

@ -0,0 +1,144 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "core/memory.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
.pName = "main",
};
u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
for (const auto& buffer : info.buffers) {
bindings.push_back({
.binding = binding++,
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& image : info.images) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& sampler : info.samplers) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampler,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
desc_layout = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
const vk::DescriptorSetLayout set_layout = *desc_layout;
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
.stage = shader_ci,
.layout = *pipeline_layout,
};
auto result =
instance.GetDevice().createComputePipelineUnique(pipeline_cache, compute_pipeline_ci);
if (result.result == vk::Result::eSuccess) {
pipeline = std::move(result.value);
} else {
UNREACHABLE_MSG("Graphics pipeline creation failed!");
}
}
ComputePipeline::~ComputePipeline() = default;
void ComputePipeline::BindResources(Core::MemoryManager* memory,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};
for (const auto& buffer : info.buffers) {
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const u32 size = vsharp.GetSize();
const VAddr addr = vsharp.base_address.Value();
texture_cache.OnCpuWrite(addr);
const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
buffer_infos.emplace_back(vk_buffer, offset, size);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
}
for (const auto& image : info.images) {
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
}
for (const auto& sampler : info.samplers) {
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
}
if (!set_writes.empty()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
set_writes);
}
}
} // namespace Vulkan

View file

@ -0,0 +1,45 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace VideoCore {
class TextureCache;
}
namespace Vulkan {
class Instance;
class Scheduler;
class StreamBuffer;
class ComputePipeline {
public:
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, const Shader::Info* info,
vk::ShaderModule module);
~ComputePipeline();
[[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline;
}
void BindResources(Core::MemoryManager* memory, VideoCore::TextureCache& texture_cache) const;
private:
const Instance& instance;
Scheduler& scheduler;
vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout;
Shader::Info info{};
};
} // namespace Vulkan

View file

@ -16,7 +16,8 @@
namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
const PipelineKey& key_, vk::PipelineCache pipeline_cache,
const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules)
: instance{instance_}, scheduler{scheduler_}, key{key_} {
@ -50,7 +51,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
});
bindings.push_back({
.binding = input.binding,
.stride = u32(buffer.stride),
.stride = buffer.GetStride(),
.inputRate = vk::VertexInputRate::eVertex,
});
}
@ -275,8 +276,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
const auto& input = vs_info.vs_inputs[i];
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (i == 0) {
start_offset =
map_staging(buffer.base_address.Value(), buffer.stride * buffer.num_records);
start_offset = map_staging(buffer.base_address.Value(), buffer.GetSize());
base_address = buffer.base_address;
}
buffers[i] = staging.Handle();
@ -297,7 +297,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const u32 size = vsharp.stride * vsharp.num_records;
const u32 size = vsharp.GetSize();
const u32 offset = map_staging(vsharp.base_address.Value(), size);
buffer_infos.emplace_back(staging.Handle(), offset, size);
set_writes.push_back({

View file

@ -26,7 +26,7 @@ class StreamBuffer;
using Liverpool = AmdGpu::Liverpool;
struct PipelineKey {
struct GraphicsPipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
vk::Format depth_format;
@ -40,16 +40,16 @@ struct PipelineKey {
Liverpool::CullMode cull_mode;
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
bool operator==(const PipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
bool operator==(const GraphicsPipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0;
}
};
static_assert(std::has_unique_object_representations_v<PipelineKey>);
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
const PipelineKey& key, vk::PipelineCache pipeline_cache,
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline();
@ -76,14 +76,14 @@ private:
vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout;
std::array<Shader::Info, MaxShaderStages> stages{};
PipelineKey key;
GraphicsPipelineKey key;
};
} // namespace Vulkan
template <>
struct std::hash<Vulkan::PipelineKey> {
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
struct std::hash<Vulkan::GraphicsPipelineKey> {
std::size_t operator()(const Vulkan::GraphicsPipelineKey& key) const noexcept {
return XXH3_64bits(&key, sizeof(key));
}
};

View file

@ -205,6 +205,7 @@ bool Instance::CreateDevice() {
.timelineSemaphore = true,
},
vk::PhysicalDeviceVulkan13Features{
.shaderDemoteToHelperInvocation = true,
.dynamicRendering = true,
.maintenance4 = true,
},

View file

@ -21,7 +21,12 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
info.user_data = user_data;
info.stage = stage;
switch (stage) {
case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs;
break;
}
case Shader::Stage::Fragment: {
info.num_user_data = regs.ps_program.settings.num_user_regs;
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
@ -32,6 +37,13 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
}
break;
}
case Shader::Stage::Compute: {
const auto& cs_pgm = regs.cs_program;
info.num_user_data = cs_pgm.settings.num_user_regs;
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full};
break;
}
default:
break;
}
@ -48,17 +60,30 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
};
}
const GraphicsPipeline* PipelineCache::GetPipeline() {
RefreshKey();
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
RefreshGraphicsKey();
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) {
it.value() = CreatePipeline();
it.value() = CreateGraphicsPipeline();
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
}
void PipelineCache::RefreshKey() {
const ComputePipeline* PipelineCache::GetComputePipeline() {
const auto& cs_pgm = liverpool->regs.cs_program;
ASSERT(cs_pgm.Address() != nullptr);
const auto code = cs_pgm.Code();
compute_key = XXH3_64bits(code.data(), code.size_bytes());
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
if (is_new) {
it.value() = CreateComputePipeline();
}
const ComputePipeline* pipeline = it->second.get();
return pipeline;
}
void PipelineCache::RefreshGraphicsKey() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
@ -92,7 +117,7 @@ void PipelineCache::RefreshKey() {
}
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
const auto& regs = liverpool->regs;
u32 binding{};
@ -141,6 +166,36 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
infos, stages);
}
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
const auto& cs_pgm = liverpool->regs.cs_program;
const auto code = cs_pgm.Code();
// Dump shader code if requested.
if (Config::dumpShaders()) {
DumpShader(code, compute_key, Shader::Stage::Compute, "bin");
}
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
// Recompile shader to IR.
const Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
u32 binding{};
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding);
const auto module = CompileSPV(spv_code, instance.GetDevice());
if (Config::dumpShaders()) {
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
}
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, &program.info,
module);
}
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
std::string_view ext) {
using namespace Common::FS;

View file

@ -7,6 +7,7 @@
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Shader {
@ -26,15 +27,17 @@ public:
AmdGpu::Liverpool* liverpool);
~PipelineCache() = default;
const GraphicsPipeline* GetPipeline();
const GraphicsPipeline* GetGraphicsPipeline();
const ComputePipeline* GetComputePipeline();
private:
void RefreshKey();
std::unique_ptr<GraphicsPipeline> CreatePipeline();
void RefreshGraphicsKey();
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, std::string_view ext);
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<ComputePipeline> CreateComputePipeline();
private:
const Instance& instance;
Scheduler& scheduler;
@ -43,9 +46,11 @@ private:
vk::UniquePipelineLayout pipeline_layout;
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
std::array<vk::ShaderModule, MaxShaderStages> stages{};
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
Shader::Profile profile{};
PipelineKey graphics_key{};
GraphicsPipelineKey graphics_key{};
u64 compute_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool;
};

View file

@ -36,7 +36,7 @@ void Rasterizer::Draw(bool is_indexed) {
const auto cmdbuf = scheduler.CommandBuffer();
const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed);
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
@ -49,8 +49,13 @@ void Rasterizer::Draw(bool is_indexed) {
};
// TODO: Don't restart renderpass every draw
const auto& scissor = regs.screen_scissor;
const vk::RenderingInfo rendering_info = {
.renderArea = {.offset = {0, 0}, .extent = {1920, 1080}},
.renderArea =
{
.offset = {scissor.top_left_x, scissor.top_left_y},
.extent = {scissor.GetWidth(), scissor.GetHeight()},
},
.layerCount = 1,
.colorAttachmentCount = 1,
.pColorAttachments = &color_info,
@ -69,6 +74,17 @@ void Rasterizer::Draw(bool is_indexed) {
cmdbuf.endRendering();
}
void Rasterizer::DispatchDirect() {
return;
const auto cmdbuf = scheduler.CommandBuffer();
const auto& cs_program = liverpool->regs.cs_program;
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
pipeline->BindResources(memory, texture_cache);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
}
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;

View file

@ -31,6 +31,8 @@ public:
void Draw(bool is_indexed);
void DispatchDirect();
private:
u32 SetupIndexBuffer(bool& is_indexed);
void MapMemory(VAddr addr, size_t size);

View file

@ -55,7 +55,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.compositeAlpha = composite_alpha,
.presentMode = vk::PresentModeKHR::eMailbox,
.presentMode = vk::PresentModeKHR::eFifo,
.clipped = true,
.oldSwapchain = nullptr,
};