video_core: Implement guest buffer manager (#373)

* video_core: Introduce buffer cache

* video_core: Use multi level page table for caches

* renderer_vulkan: Remove unused stream buffer

* fix build

* oops forgot optimize off
This commit is contained in:
TheTurtle 2024-08-08 15:02:10 +03:00 committed by GitHub
parent 159be2c7f4
commit 381ba8c7a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 2697 additions and 1039 deletions

View file

@ -67,8 +67,8 @@ RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool*
: window{window_}, liverpool{liverpool_},
instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, draw_scheduler{instance},
present_scheduler{instance}, flip_scheduler{instance}, swapchain{instance, window},
texture_cache{instance, draw_scheduler} {
rasterizer = std::make_unique<Rasterizer>(instance, draw_scheduler, texture_cache, liverpool);
rasterizer{std::make_unique<Rasterizer>(instance, draw_scheduler, liverpool)},
texture_cache{rasterizer->GetTextureCache()} {
const u32 num_images = swapchain.GetImageCount();
const vk::Device device = instance.GetDevice();

View file

@ -47,7 +47,7 @@ public:
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address, bool is_eop) {
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info, cpu_address);
const auto image_id = texture_cache.FindImage(info, false);
auto& image = texture_cache.GetImage(image_id);
return PrepareFrameInternal(image, is_eop);
}
@ -61,7 +61,7 @@ public:
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info, cpu_address);
const auto image_id = texture_cache.FindImage(info, false);
return texture_cache.GetImage(image_id);
}
@ -88,7 +88,7 @@ private:
Scheduler flip_scheduler;
Swapchain swapchain;
std::unique_ptr<Rasterizer> rasterizer;
VideoCore::TextureCache texture_cache;
VideoCore::TextureCache& texture_cache;
vk::UniqueCommandPool command_pool;
std::vector<Frame> present_frames;
std::queue<Frame*> free_queue;

View file

@ -3,11 +3,10 @@
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {
@ -51,6 +50,12 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
});
}
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(Shader::PushData),
};
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = static_cast<u32>(bindings.size()),
@ -62,8 +67,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
.pushConstantRangeCount = 1U,
.pPushConstantRanges = &push_constants,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -82,35 +87,18 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
Shader::PushData push_data{};
u32 binding{};
for (const auto& buffer : info.buffers) {
for (u32 i = 0; const auto& buffer : info.buffers) {
const auto vsharp = buffer.GetVsharp(info);
const u32 size = vsharp.GetSize();
const VAddr address = vsharp.base_address;
if (buffer.is_storage) {
texture_cache.OnCpuWrite(address);
}
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment());
buffer_infos.emplace_back(staging.Handle(), offset, size);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
// can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will
@ -125,6 +113,31 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
const u32 size = vsharp.GetSize();
if (buffer.is_written) {
texture_cache.InvalidateMemory(address, size);
}
const u32 alignment =
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(i, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
i++;
}
for (const auto& image_desc : info.images) {
@ -168,6 +181,8 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
&push_data);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes);
return true;
}

View file

@ -6,19 +6,15 @@
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace VideoCore {
class BufferCache;
class TextureCache;
}
} // namespace VideoCore
namespace Vulkan {
class Instance;
class Scheduler;
class StreamBuffer;
class ComputePipeline {
public:
@ -31,7 +27,7 @@ public:
return *pipeline;
}
bool BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
bool BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const;
private:

View file

@ -5,13 +5,13 @@
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include "common/alignment.h"
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {
@ -32,9 +32,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
BuildDescSetLayout();
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.offset = 0,
.size = 2 * sizeof(u32),
.size = sizeof(Shader::PushData),
};
const vk::DescriptorSetLayout set_layout = *desc_layout;
@ -328,25 +328,43 @@ void GraphicsPipeline::BuildDescSetLayout() {
desc_layout = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
}
void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const {
BindVertexBuffers(staging);
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
Shader::PushData push_data{};
u32 binding{};
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
if (stage.uses_step_rates) {
push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step1 = regs.vgt_instance_step_rate_1;
}
for (u32 i = 0; const auto& buffer : stage.buffers) {
const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment());
buffer_infos.emplace_back(staging.Handle(), offset, size);
if (vsharp) {
const VAddr address = vsharp.base_address;
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
const u32 size = vsharp.GetSize();
const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(i, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} else {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
@ -356,10 +374,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
i++;
}
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
@ -406,86 +421,15 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
}
}
const auto cmdbuf = scheduler.CommandBuffer();
if (!set_writes.empty()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
set_writes);
}
}
void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
const auto& vs_info = stages[u32(Shader::Stage::Vertex)];
if (vs_info.vs_inputs.empty()) {
return;
}
std::array<vk::Buffer, MaxVertexBufferCount> host_buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> host_offsets;
boost::container::static_vector<AmdGpu::Buffer, MaxVertexBufferCount> guest_buffers;
struct BufferRange {
VAddr base_address;
VAddr end_address;
u64 offset; // offset in the mapped memory
size_t GetSize() const {
return end_address - base_address;
}
};
// Calculate buffers memory overlaps
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue;
}
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) {
continue;
}
guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
return lhv.base_address < rhv.base_address;
});
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]};
for (auto range : ranges) {
auto& prev_range = ranges_merged.back();
if (prev_range.end_address < range.base_address) {
ranges_merged.emplace_back(range);
} else {
prev_range.end_address = std::max(prev_range.end_address, range.end_address);
}
}
// Map buffers
for (auto& range : ranges_merged) {
range.offset = staging.Copy(range.base_address, range.GetSize(), 4);
}
// Bind vertex buffers
const size_t num_buffers = guest_buffers.size();
for (u32 i = 0; i < num_buffers; ++i) {
const auto& buffer = guest_buffers[i];
const auto& host_buffer = std::ranges::find_if(
ranges_merged.cbegin(), ranges_merged.cend(), [&](const BufferRange& range) {
return (buffer.base_address >= range.base_address &&
buffer.base_address < range.end_address);
});
assert(host_buffer != ranges_merged.cend());
host_buffers[i] = staging.Handle();
host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
}
if (num_buffers > 0) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
}
cmdbuf.pushConstants(*pipeline_layout,
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U,
sizeof(push_data), &push_data);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
}
} // namespace Vulkan

View file

@ -7,13 +7,10 @@
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace VideoCore {
class BufferCache;
class TextureCache;
}
} // namespace VideoCore
namespace Vulkan {
@ -22,7 +19,6 @@ static constexpr u32 MaxShaderStages = 5;
class Instance;
class Scheduler;
class StreamBuffer;
using Liverpool = AmdGpu::Liverpool;
@ -64,7 +60,7 @@ public:
std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline();
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const;
vk::Pipeline Handle() const noexcept {
@ -75,6 +71,10 @@ public:
return *pipeline_layout;
}
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
return stages[u32(stage)];
}
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
@ -90,7 +90,6 @@ public:
private:
void BuildDescSetLayout();
void BindVertexBuffers(StreamBuffer& staging) const;
private:
const Instance& instance;

View file

@ -204,7 +204,8 @@ bool Instance::CreateDevice() {
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
@ -303,12 +304,19 @@ bool Instance::CreateDevice() {
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true,
}};
},
vk::PhysicalDeviceRobustness2FeaturesEXT{
.nullDescriptor = true,
},
};
if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
}
if (!robustness) {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
try {
device = physical_device.createDeviceUnique(device_chain.get());

View file

@ -5,7 +5,6 @@
#include <tsl/robin_map.h>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@ -51,8 +50,8 @@ private:
Shader::Profile profile{};
GraphicsPipelineKey graphics_key{};
u64 compute_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool;
Common::ObjectPool<Shader::IR::Inst> inst_pool;
Common::ObjectPool<Shader::IR::Block> block_pool;
};
} // namespace Vulkan

View file

@ -13,22 +13,17 @@
namespace Vulkan {
static constexpr vk::BufferUsageFlags VertexIndexFlags =
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformBuffer |
vk::BufferUsageFlagBits::eStorageBuffer;
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
buffer_cache{instance, scheduler, liverpool_, page_manager},
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
memory->SetInstance(&instance);
memory->SetRasterizer(this);
wfi_event = instance.GetDevice().createEventUnique({});
}
Rasterizer::~Rasterizer() = default;
@ -38,29 +33,24 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
const auto cmdbuf = scheduler.CommandBuffer();
const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset);
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
if (!pipeline) {
return;
}
try {
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
pipeline->BindResources(regs, buffer_cache, texture_cache);
} catch (...) {
UNREACHABLE();
}
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info);
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
BeginRendering();
UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
const u32 step_rates[] = {
regs.vgt_instance_step_rate_0,
regs.vgt_instance_step_rate_1,
};
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
sizeof(step_rates), &step_rates);
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
@ -82,8 +72,7 @@ void Rasterizer::DispatchDirect() {
}
try {
const auto has_resources =
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache);
if (!has_resources) {
return;
}
@ -131,7 +120,7 @@ void Rasterizer::BeginRendering() {
state.color_images[state.num_color_attachments] = image.image;
state.color_attachments[state.num_color_attachments++] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
@ -168,45 +157,19 @@ void Rasterizer::BeginRendering() {
scheduler.BeginRendering(state);
}
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
// ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
void Rasterizer::InvalidateMemory(VAddr addr, u64 size) {
buffer_cache.InvalidateMemory(addr, size);
texture_cache.InvalidateMemory(addr, size);
}
// Emit indices.
const u32 index_size = 3 * regs.num_indices;
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
vertex_index_buffer.Commit(index_size);
void Rasterizer::MapMemory(VAddr addr, u64 size) {
page_manager.OnGpuMap(addr, size);
}
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
return index_size / sizeof(u16);
}
if (!is_indexed) {
return regs.num_indices;
}
// Figure out index type and size.
const bool is_index16 = regs.index_buffer_type.index_type == Liverpool::IndexType::Index16;
const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32;
const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32);
// Upload index data to stream buffer.
const auto index_address = regs.index_base_address.Address<const void*>();
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
std::memcpy(data, index_address, index_buffer_size);
vertex_index_buffer.Commit(index_buffer_size);
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset + index_offset * index_size,
index_type);
return regs.num_indices;
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
buffer_cache.InvalidateMemory(addr, size);
texture_cache.UnmapMemory(addr, size);
page_manager.OnGpuUnmap(addr, size);
}
void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) {

View file

@ -3,8 +3,10 @@
#pragma once
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/texture_cache.h"
namespace AmdGpu {
struct Liverpool;
@ -14,10 +16,6 @@ namespace Core {
class MemoryManager;
}
namespace VideoCore {
class TextureCache;
}
namespace Vulkan {
class Scheduler;
@ -26,9 +24,13 @@ class GraphicsPipeline;
class Rasterizer {
public:
explicit Rasterizer(const Instance& instance, Scheduler& scheduler,
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
AmdGpu::Liverpool* liverpool);
~Rasterizer();
[[nodiscard]] VideoCore::TextureCache& GetTextureCache() noexcept {
return texture_cache;
}
void Draw(bool is_indexed, u32 index_offset = 0);
void DispatchDirect();
@ -36,12 +38,13 @@ public:
void ScopeMarkerBegin(const std::string& str);
void ScopeMarkerEnd();
void InvalidateMemory(VAddr addr, u64 size);
void MapMemory(VAddr addr, u64 size);
void UnmapMemory(VAddr addr, u64 size);
u64 Flush();
private:
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
void MapMemory(VAddr addr, size_t size);
void BeginRendering();
void UpdateDynamicState(const GraphicsPipeline& pipeline);
@ -51,11 +54,13 @@ private:
private:
const Instance& instance;
Scheduler& scheduler;
VideoCore::TextureCache& texture_cache;
VideoCore::PageManager page_manager;
VideoCore::BufferCache buffer_cache;
VideoCore::TextureCache texture_cache;
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer;
vk::UniqueEvent wfi_event;
};
} // namespace Vulkan

View file

@ -6,6 +6,7 @@
#include <condition_variable>
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "common/unique_function.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -97,8 +98,8 @@ public:
}
/// Defers an operation until the gpu has reached the current cpu tick.
void DeferOperation(auto&& func) {
pending_ops.emplace(func, CurrentTick());
void DeferOperation(Common::UniqueFunction<void>&& func) {
pending_ops.emplace(std::move(func), CurrentTick());
}
static std::mutex submit_mutex;
@ -115,7 +116,7 @@ private:
vk::CommandBuffer current_cmdbuf;
std::condition_variable_any event_cv;
struct PendingOp {
std::function<void()> callback;
Common::UniqueFunction<void> callback;
u64 gpu_tick;
};
std::queue<PendingOp> pending_ops;

View file

@ -1,241 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
namespace {
std::string_view BufferTypeName(BufferType type) {
switch (type) {
case BufferType::Upload:
return "Upload";
case BufferType::Download:
return "Download";
case BufferType::Stream:
return "Stream";
default:
return "Invalid";
}
}
vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) {
switch (type) {
case BufferType::Upload:
return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
case BufferType::Download:
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached;
case BufferType::Stream:
return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
default:
UNREACHABLE_MSG("Unknown buffer type {}", static_cast<u32>(type));
return vk::MemoryPropertyFlagBits::eHostVisible;
}
}
static std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
vk::MemoryPropertyFlags wanted) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type.
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) {
vk::MemoryPropertyFlags flags = MakePropertyFlags(type);
std::optional preferred_type = FindMemoryType(properties, flags);
constexpr std::array remove_flags = {
vk::MemoryPropertyFlagBits::eHostCached,
vk::MemoryPropertyFlagBits::eHostCoherent,
};
for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) {
flags &= ~remove_flags[i];
preferred_type = FindMemoryType(properties, flags);
}
ASSERT_MSG(preferred_type, "No suitable memory type found");
return preferred_type.value();
}
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
} // Anonymous namespace
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
vk::BufferUsageFlags usage_, u64 size, BufferType type_)
: instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()},
stream_buffer_size{size}, usage{usage_}, type{type_} {
CreateBuffers(size);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
StreamBuffer::~StreamBuffer() {
device.unmapMemory(memory);
device.destroyBuffer(buffer);
device.freeMemory(memory);
}
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
if (!is_coherent && type == BufferType::Stream) {
size = Common::AlignUp(size, instance.NonCoherentAtomSize());
}
ASSERT(size <= stream_buffer_size);
mapped_size = size;
if (alignment > 0) {
offset = Common::AlignUp(offset, alignment);
}
bool invalidate{false};
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidate = true;
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
}
const u64 mapped_upper_bound = offset + size;
WaitPendingOperations(mapped_upper_bound);
return std::make_tuple(mapped + offset, offset, invalidate);
}
void StreamBuffer::Commit(u64 size) {
if (!is_coherent && type == BufferType::Stream) {
size = Common::AlignUp(size, instance.NonCoherentAtomSize());
}
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
size);
const vk::MappedMemoryRange range = {
.memory = memory,
.offset = offset,
.size = size,
};
if (!is_coherent && type == BufferType::Download) {
device.invalidateMappedMemoryRanges(range);
} else if (!is_coherent) {
device.flushMappedMemoryRanges(range);
}
offset += size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void StreamBuffer::CreateBuffers(u64 prefered_size) {
const vk::Device device = instance.GetDevice();
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties, type);
const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type];
const u32 preferred_heap = mem_type.heapIndex;
is_coherent =
static_cast<bool>(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent);
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const vk::DeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
// As per DXVK's example, using `heap_size / 2`
const vk::DeviceSize allocable_size = heap_size / 2;
buffer = device.createBuffer({
.size = std::min(prefered_size, allocable_size),
.usage = usage,
});
const auto requirements_chain =
device
.getBufferMemoryRequirements2<vk::MemoryRequirements2, vk::MemoryDedicatedRequirements>(
{.buffer = buffer});
const auto& requirements = requirements_chain.get<vk::MemoryRequirements2>();
const auto& dedicated_requirements = requirements_chain.get<vk::MemoryDedicatedRequirements>();
stream_buffer_size = static_cast<u64>(requirements.memoryRequirements.size);
LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KiB with flags {}",
BufferTypeName(type), stream_buffer_size / 1024,
vk::to_string(mem_type.propertyFlags));
if (dedicated_requirements.prefersDedicatedAllocation) {
vk::StructureChain<vk::MemoryAllocateInfo, vk::MemoryDedicatedAllocateInfo> alloc_chain =
{};
auto& alloc_info = alloc_chain.get<vk::MemoryAllocateInfo>();
alloc_info.allocationSize = requirements.memoryRequirements.size;
alloc_info.memoryTypeIndex = preferred_type;
auto& dedicated_alloc_info = alloc_chain.get<vk::MemoryDedicatedAllocateInfo>();
dedicated_alloc_info.buffer = buffer;
memory = device.allocateMemory(alloc_chain.get());
} else {
memory = device.allocateMemory({
.allocationSize = requirements.memoryRequirements.size,
.memoryTypeIndex = preferred_type,
});
}
device.bindBufferMemory(buffer, memory, 0);
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
}
}
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
u64 StreamBuffer::Copy(VAddr src, size_t size, size_t alignment /*= 0*/) {
const auto [data, offset, _] = Map(size, alignment);
std::memcpy(data, reinterpret_cast<const void*>(src), size);
Commit(size);
return offset;
}
} // namespace Vulkan

View file

@ -1,89 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <optional>
#include <span>
#include <tuple>
#include <vector>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
enum class BufferType : u32 {
Upload = 0,
Download = 1,
Stream = 2,
};
class Instance;
class Scheduler;
class StreamBuffer final {
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
public:
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
vk::BufferUsageFlags usage, u64 size,
BufferType type = BufferType::Stream);
~StreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);
/// Maps and commits a memory region with user provided data
u64 Copy(VAddr src, size_t size, size_t alignment = 0);
vk::Buffer Handle() const noexcept {
return buffer;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(u64 prefered_size);
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
private:
const Instance& instance; ///< Vulkan instance.
Scheduler& scheduler; ///< Command scheduler.
vk::Device device;
vk::Buffer buffer; ///< Mapped buffer.
vk::DeviceMemory memory; ///< Memory allocation.
u8* mapped{}; ///< Pointer to the mapped memory
u64 stream_buffer_size{}; ///< Stream buffer size.
vk::BufferUsageFlags usage{};
BufferType type;
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
bool is_coherent{}; ///< True if the buffer is coherent
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan