mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-31 15:53:17 +00:00
video_core: Implement basic compute shaders and more instructions
This commit is contained in:
parent
10bceb1643
commit
58de7ff55a
58 changed files with 1234 additions and 293 deletions
|
@ -206,7 +206,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
// const auto* dispatch_direct = reinterpret_cast<PM4CmdDispatchDirect*>(header);
|
||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
rasterizer->DispatchDirect();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
|
|
|
@ -48,9 +48,28 @@ struct Liverpool {
|
|||
|
||||
using UserData = std::array<u32, NumShaderUserData>;
|
||||
|
||||
struct BinaryInfo {
|
||||
u8 signature[7];
|
||||
u8 version;
|
||||
u32 pssl_or_cg : 1;
|
||||
u32 cached : 1;
|
||||
u32 type : 4;
|
||||
u32 source_type : 2;
|
||||
u32 length : 24;
|
||||
u8 chunk_usage_base_offset_in_dw;
|
||||
u8 num_input_usage_slots;
|
||||
u8 is_srt : 1;
|
||||
u8 is_srt_used_info_valid : 1;
|
||||
u8 is_extended_usage_info : 1;
|
||||
u8 reserved2 : 5;
|
||||
u8 reserved3;
|
||||
u64 shader_hash;
|
||||
u32 crc32;
|
||||
};
|
||||
|
||||
struct ShaderProgram {
|
||||
u32 address_lo;
|
||||
u32 address_hi;
|
||||
BitField<0, 8, u32> address_hi;
|
||||
union {
|
||||
BitField<0, 6, u64> num_vgprs;
|
||||
BitField<6, 4, u64> num_sgprs;
|
||||
|
@ -65,13 +84,53 @@ struct Liverpool {
|
|||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
u32 code_size = 0;
|
||||
const u32* code = Address<u32>();
|
||||
static constexpr std::string_view PostHeader = "OrbShdr";
|
||||
while (std::memcmp(code + code_size, PostHeader.data(), PostHeader.size()) != 0) {
|
||||
code_size++;
|
||||
}
|
||||
return std::span{code, code_size};
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
return std::span{code, num_dwords};
|
||||
}
|
||||
};
|
||||
|
||||
struct ComputeProgram {
|
||||
u32 dispatch_initiator;
|
||||
u32 dim_x;
|
||||
u32 dim_y;
|
||||
u32 dim_z;
|
||||
u32 start_x;
|
||||
u32 start_y;
|
||||
u32 start_z;
|
||||
struct {
|
||||
u16 full;
|
||||
u16 partial;
|
||||
} num_thread_x, num_thread_y, num_thread_z;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
BitField<0, 12, u32> max_wave_id;
|
||||
u32 address_lo;
|
||||
BitField<0, 8, u32> address_hi;
|
||||
INSERT_PADDING_WORDS(4);
|
||||
union {
|
||||
BitField<0, 6, u64> num_vgprs;
|
||||
BitField<6, 4, u64> num_sgprs;
|
||||
BitField<33, 5, u64> num_user_regs;
|
||||
} settings;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 resource_limits;
|
||||
INSERT_PADDING_WORDS(0x2A);
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
return std::span{code, num_dwords};
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -621,7 +680,9 @@ struct Liverpool {
|
|||
ShaderProgram ps_program;
|
||||
INSERT_PADDING_WORDS(0x2C);
|
||||
ShaderProgram vs_program;
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2C4C - 16);
|
||||
INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16);
|
||||
ComputeProgram cs_program;
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80);
|
||||
u32 depth_bounds_min;
|
||||
u32 depth_bounds_max;
|
||||
u32 stencil_clear;
|
||||
|
@ -777,6 +838,10 @@ private:
|
|||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
|
||||
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
|
||||
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
|
||||
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
|
||||
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||
|
|
|
@ -540,4 +540,12 @@ struct PM4DumpConstRam {
|
|||
}
|
||||
};
|
||||
|
||||
struct PM4CmdDispatchDirect {
|
||||
PM4Type3Header header;
|
||||
u32 dim_x; ///< X dimensions of the array of thread groups to be dispatched
|
||||
u32 dim_y; ///< Y dimensions of the array of thread groups to be dispatched
|
||||
u32 dim_z; ///< Z dimensions of the array of thread groups to be dispatched
|
||||
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
@ -29,6 +30,22 @@ struct Buffer {
|
|||
BitField<21, 2, u32> index_stride;
|
||||
BitField<23, 1, u32> add_tid_enable;
|
||||
};
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride == 0 ? 1U : stride;
|
||||
}
|
||||
|
||||
u32 GetStrideElements(u32 element_size) const noexcept {
|
||||
if (stride == 0) {
|
||||
return 1U;
|
||||
}
|
||||
ASSERT(stride % element_size == 0);
|
||||
return stride / element_size;
|
||||
}
|
||||
|
||||
u32 GetSize() const noexcept {
|
||||
return GetStride() * num_records;
|
||||
}
|
||||
};
|
||||
|
||||
enum class ImageType : u64 {
|
||||
|
@ -70,7 +87,7 @@ constexpr std::string_view NameOf(ImageType type) {
|
|||
|
||||
struct Image {
|
||||
union {
|
||||
BitField<0, 40, u64> base_address;
|
||||
BitField<0, 38, u64> base_address;
|
||||
BitField<40, 12, u64> min_lod;
|
||||
BitField<52, 6, u64> data_format;
|
||||
BitField<58, 4, u64> num_format;
|
||||
|
|
|
@ -297,6 +297,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR32G32Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format5_6_5 &&
|
||||
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eB5G6R5UnormPack16;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR8Unorm;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
@ -305,6 +312,10 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
|
|||
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
|
||||
return vk::Format::eD32SfloatS8Uint;
|
||||
}
|
||||
if (z_format == DepthBuffer::ZFormat::Z32Float &&
|
||||
stencil_format == DepthBuffer::StencilFormat::Invalid) {
|
||||
return vk::Format::eD32Sfloat;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
|
144
src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
Normal file
144
src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
Normal file
|
@ -0,0 +1,144 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
|
||||
vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = module,
|
||||
.pName = "main",
|
||||
};
|
||||
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
for (const auto& buffer : info.buffers) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
for (const auto& image : info.images) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
for (const auto& sampler : info.samplers) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
|
||||
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
desc_layout = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
|
||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 1U,
|
||||
.pSetLayouts = &set_layout,
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
};
|
||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
|
||||
const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
|
||||
.stage = shader_ci,
|
||||
.layout = *pipeline_layout,
|
||||
};
|
||||
auto result =
|
||||
instance.GetDevice().createComputePipelineUnique(pipeline_cache, compute_pipeline_ci);
|
||||
if (result.result == vk::Result::eSuccess) {
|
||||
pipeline = std::move(result.value);
|
||||
} else {
|
||||
UNREACHABLE_MSG("Graphics pipeline creation failed!");
|
||||
}
|
||||
}
|
||||
|
||||
ComputePipeline::~ComputePipeline() = default;
|
||||
|
||||
void ComputePipeline::BindResources(Core::MemoryManager* memory,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
u32 binding{};
|
||||
|
||||
for (const auto& buffer : info.buffers) {
|
||||
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const u32 size = vsharp.GetSize();
|
||||
const VAddr addr = vsharp.base_address.Value();
|
||||
texture_cache.OnCpuWrite(addr);
|
||||
const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
||||
buffer_infos.emplace_back(vk_buffer, offset, size);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& image : info.images) {
|
||||
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
||||
const auto& image_view = texture_cache.FindImageView(tsharp);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
for (const auto& sampler : info.samplers) {
|
||||
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
if (!set_writes.empty()) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
|
||||
set_writes);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
45
src/video_core/renderer_vulkan/vk_compute_pipeline.h
Normal file
45
src/video_core/renderer_vulkan/vk_compute_pipeline.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Core {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class TextureCache;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class StreamBuffer;
|
||||
|
||||
class ComputePipeline {
|
||||
public:
|
||||
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
||||
vk::PipelineCache pipeline_cache, const Shader::Info* info,
|
||||
vk::ShaderModule module);
|
||||
~ComputePipeline();
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
void BindResources(Core::MemoryManager* memory, VideoCore::TextureCache& texture_cache) const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
Shader::Info info{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -16,7 +16,8 @@
|
|||
namespace Vulkan {
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
const PipelineKey& key_, vk::PipelineCache pipeline_cache,
|
||||
const GraphicsPipelineKey& key_,
|
||||
vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules)
|
||||
: instance{instance_}, scheduler{scheduler_}, key{key_} {
|
||||
|
@ -50,7 +51,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
});
|
||||
bindings.push_back({
|
||||
.binding = input.binding,
|
||||
.stride = u32(buffer.stride),
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = vk::VertexInputRate::eVertex,
|
||||
});
|
||||
}
|
||||
|
@ -275,8 +276,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
const auto& input = vs_info.vs_inputs[i];
|
||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
if (i == 0) {
|
||||
start_offset =
|
||||
map_staging(buffer.base_address.Value(), buffer.stride * buffer.num_records);
|
||||
start_offset = map_staging(buffer.base_address.Value(), buffer.GetSize());
|
||||
base_address = buffer.base_address;
|
||||
}
|
||||
buffers[i] = staging.Handle();
|
||||
|
@ -297,7 +297,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const u32 size = vsharp.stride * vsharp.num_records;
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 offset = map_staging(vsharp.base_address.Value(), size);
|
||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -26,7 +26,7 @@ class StreamBuffer;
|
|||
|
||||
using Liverpool = AmdGpu::Liverpool;
|
||||
|
||||
struct PipelineKey {
|
||||
struct GraphicsPipelineKey {
|
||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
vk::Format depth_format;
|
||||
|
@ -40,16 +40,16 @@ struct PipelineKey {
|
|||
Liverpool::CullMode cull_mode;
|
||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||
|
||||
bool operator==(const PipelineKey& key) const noexcept {
|
||||
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
|
||||
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
||||
return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0;
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<PipelineKey>);
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
|
||||
|
||||
class GraphicsPipeline {
|
||||
public:
|
||||
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
|
||||
const PipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules);
|
||||
~GraphicsPipeline();
|
||||
|
@ -76,14 +76,14 @@ private:
|
|||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
std::array<Shader::Info, MaxShaderStages> stages{};
|
||||
PipelineKey key;
|
||||
GraphicsPipelineKey key;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
template <>
|
||||
struct std::hash<Vulkan::PipelineKey> {
|
||||
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
|
||||
struct std::hash<Vulkan::GraphicsPipelineKey> {
|
||||
std::size_t operator()(const Vulkan::GraphicsPipelineKey& key) const noexcept {
|
||||
return XXH3_64bits(&key, sizeof(key));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -205,6 +205,7 @@ bool Instance::CreateDevice() {
|
|||
.timelineSemaphore = true,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan13Features{
|
||||
.shaderDemoteToHelperInvocation = true,
|
||||
.dynamicRendering = true,
|
||||
.maintenance4 = true,
|
||||
},
|
||||
|
|
|
@ -21,7 +21,12 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
|||
info.user_data = user_data;
|
||||
info.stage = stage;
|
||||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.ps_inputs.push_back({
|
||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
||||
|
@ -32,6 +37,13 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
|||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -48,17 +60,30 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
};
|
||||
}
|
||||
|
||||
const GraphicsPipeline* PipelineCache::GetPipeline() {
|
||||
RefreshKey();
|
||||
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||
RefreshGraphicsKey();
|
||||
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
||||
if (is_new) {
|
||||
it.value() = CreatePipeline();
|
||||
it.value() = CreateGraphicsPipeline();
|
||||
}
|
||||
const GraphicsPipeline* pipeline = it->second.get();
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void PipelineCache::RefreshKey() {
|
||||
const ComputePipeline* PipelineCache::GetComputePipeline() {
|
||||
const auto& cs_pgm = liverpool->regs.cs_program;
|
||||
ASSERT(cs_pgm.Address() != nullptr);
|
||||
const auto code = cs_pgm.Code();
|
||||
compute_key = XXH3_64bits(code.data(), code.size_bytes());
|
||||
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
|
||||
if (is_new) {
|
||||
it.value() = CreateComputePipeline();
|
||||
}
|
||||
const ComputePipeline* pipeline = it->second.get();
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void PipelineCache::RefreshGraphicsKey() {
|
||||
auto& regs = liverpool->regs;
|
||||
auto& key = graphics_key;
|
||||
|
||||
|
@ -92,7 +117,7 @@ void PipelineCache::RefreshKey() {
|
|||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
u32 binding{};
|
||||
|
@ -141,6 +166,36 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
|||
infos, stages);
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
||||
const auto& cs_pgm = liverpool->regs.cs_program;
|
||||
const auto code = cs_pgm.Code();
|
||||
|
||||
// Dump shader code if requested.
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, compute_key, Shader::Stage::Compute, "bin");
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
|
||||
// Recompile shader to IR.
|
||||
const Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
u32 binding{};
|
||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding);
|
||||
const auto module = CompileSPV(spv_code, instance.GetDevice());
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
|
||||
}
|
||||
|
||||
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, &program.info,
|
||||
module);
|
||||
}
|
||||
|
||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
std::string_view ext) {
|
||||
using namespace Common::FS;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
namespace Shader {
|
||||
|
@ -26,15 +27,17 @@ public:
|
|||
AmdGpu::Liverpool* liverpool);
|
||||
~PipelineCache() = default;
|
||||
|
||||
const GraphicsPipeline* GetPipeline();
|
||||
const GraphicsPipeline* GetGraphicsPipeline();
|
||||
|
||||
const ComputePipeline* GetComputePipeline();
|
||||
|
||||
private:
|
||||
void RefreshKey();
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreatePipeline();
|
||||
|
||||
void RefreshGraphicsKey();
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, std::string_view ext);
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
|
@ -43,9 +46,11 @@ private:
|
|||
vk::UniquePipelineLayout pipeline_layout;
|
||||
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
|
||||
std::array<vk::ShaderModule, MaxShaderStages> stages{};
|
||||
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
||||
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
Shader::Profile profile{};
|
||||
PipelineKey graphics_key{};
|
||||
GraphicsPipelineKey graphics_key{};
|
||||
u64 compute_key{};
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Shader::ObjectPool<Shader::IR::Block> block_pool;
|
||||
};
|
||||
|
|
|
@ -36,7 +36,7 @@ void Rasterizer::Draw(bool is_indexed) {
|
|||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const auto& regs = liverpool->regs;
|
||||
const u32 num_indices = SetupIndexBuffer(is_indexed);
|
||||
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
|
||||
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
|
||||
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
|
||||
|
@ -49,8 +49,13 @@ void Rasterizer::Draw(bool is_indexed) {
|
|||
};
|
||||
|
||||
// TODO: Don't restart renderpass every draw
|
||||
const auto& scissor = regs.screen_scissor;
|
||||
const vk::RenderingInfo rendering_info = {
|
||||
.renderArea = {.offset = {0, 0}, .extent = {1920, 1080}},
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {scissor.top_left_x, scissor.top_left_y},
|
||||
.extent = {scissor.GetWidth(), scissor.GetHeight()},
|
||||
},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachments = &color_info,
|
||||
|
@ -69,6 +74,17 @@ void Rasterizer::Draw(bool is_indexed) {
|
|||
cmdbuf.endRendering();
|
||||
}
|
||||
|
||||
void Rasterizer::DispatchDirect() {
|
||||
return;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const auto& cs_program = liverpool->regs.cs_program;
|
||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||
pipeline->BindResources(memory, texture_cache);
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||
}
|
||||
|
||||
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
|
||||
// Emulate QuadList primitive type with CPU made index buffer.
|
||||
const auto& regs = liverpool->regs;
|
||||
|
|
|
@ -31,6 +31,8 @@ public:
|
|||
|
||||
void Draw(bool is_indexed);
|
||||
|
||||
void DispatchDirect();
|
||||
|
||||
private:
|
||||
u32 SetupIndexBuffer(bool& is_indexed);
|
||||
void MapMemory(VAddr addr, size_t size);
|
||||
|
|
|
@ -55,7 +55,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
|
|||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.compositeAlpha = composite_alpha,
|
||||
.presentMode = vk::PresentModeKHR::eMailbox,
|
||||
.presentMode = vk::PresentModeKHR::eFifo,
|
||||
.clipped = true,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
|
|
|
@ -98,7 +98,7 @@ struct Image {
|
|||
if (it == image_view_infos.end()) {
|
||||
return {};
|
||||
}
|
||||
return image_view_ids[std::distance(it, image_view_infos.begin())];
|
||||
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
||||
}
|
||||
|
||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask);
|
||||
|
|
|
@ -100,8 +100,7 @@ TextureCache::~TextureCache() {
|
|||
}
|
||||
|
||||
void TextureCache::OnCpuWrite(VAddr address) {
|
||||
const VAddr address_aligned = address & ~((1 << PageShift) - 1);
|
||||
ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) {
|
||||
ForEachImageInRegion(address, 1 << PageShift, [&](ImageId image_id, Image& image) {
|
||||
// Ensure image is reuploaded when accessed again.
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
// Untrack image, so the range is unprotected and the guest can write freely.
|
||||
|
@ -270,6 +269,7 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) {
|
|||
}
|
||||
|
||||
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
||||
std::scoped_lock lk{mutex};
|
||||
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
|
||||
const u64 page_start = addr >> PageShift;
|
||||
const u64 page_end = page_start + num_pages;
|
||||
|
@ -288,7 +288,7 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
|||
const u32 interval_size = interval_end_addr - interval_start_addr;
|
||||
void* addr = reinterpret_cast<void*>(interval_start_addr);
|
||||
if (delta > 0 && count == delta) {
|
||||
mprotect(addr, interval_size, PAGE_NOACCESS);
|
||||
mprotect(addr, interval_size, PAGE_READONLY);
|
||||
} else if (delta < 0 && count == -delta) {
|
||||
mprotect(addr, interval_size, PAGE_READWRITE);
|
||||
} else {
|
||||
|
|
|
@ -132,6 +132,7 @@ private:
|
|||
tsl::robin_map<u64, Sampler> samplers;
|
||||
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
|
||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||
std::mutex mutex;
|
||||
#ifdef _WIN64
|
||||
void* veh_handle{};
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue