video_core: Rewrite vulkan and videoout

This commit is contained in:
GPUCode 2024-04-14 17:09:51 +03:00
parent 0a94899c86
commit c01b6f8397
89 changed files with 5378 additions and 2150 deletions

View file

@ -0,0 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <utility>
#include "common/types.h"
namespace VideoCore {
// Based on Table 8.13 Data and Image Formats in Sea Islands Series Instruction Set Architecture
enum class PixelFormat : u32 {
Invalid,
R32G32B32A32_Float,
B32G32R32A32_Float,
R32G32B32X32_Float,
B32G32R32X32_Float,
R32G32B32A32_Uint,
R32G32B32A32_Sint,
R32G32B32_Float,
R32G32B32_Uint,
R32G32B32_Sint,
R16G16B16A16_Float,
R16G16B16X16_Float,
B16G16R16X16_Float,
R16G16B16A16_Uint,
R16G16B16A16_Sint,
R16G16B16A16_Unorm,
B16G16R16A16_Unorm,
R16G16B16X16_Unorm,
B16G16R16X16_Unorm,
R16G16B16A16_Snorm,
L32A32_Float,
R32G32_Float,
R32G32_Uint,
R32G32_Sint,
R11G11B10_Float,
R8G8B8A8_Unorm,
R8G8B8X8_Unorm,
R8G8B8A8_UnormSrgb,
R8G8B8X8_UnormSrgb,
R8G8B8A8_Uint,
R8G8B8A8_Snorm,
R8G8B8A8_Sint,
L16A16_Float,
R16G16_Float,
L16A16_Unorm,
R16G16_Unorm,
R16G16_Uint,
R16G16_Snorm,
R16G16_Sint,
R32_Float,
L32_Float,
A32_Float,
R32_Uint,
R32_Sint,
R8G8_Unorm,
R8G8_Uint,
R8G8_Snorm,
R8G8_Sint,
L8A8_Unorm,
L8A8_UnormSrgb,
R16_Float,
L16_Float,
A16_Float,
R16_Unorm,
L16_Unorm,
A16_Unorm,
R16_Uint,
R16_Snorm,
R16_Sint,
R8_Unorm,
L8_Unorm,
L8_UnormSrgb,
R8_Uint,
R8_Snorm,
R8_Sint,
A8_Unorm,
};
constexpr bool IsDepthStencilFormat(PixelFormat format) {
return false;
}
} // namespace VideoCore

View file

@ -0,0 +1,354 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "sdl_window.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include <vk_mem_alloc.h>
namespace Vulkan {
bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format format) {
const vk::FormatProperties props{physical_device.getFormatProperties(format)};
return static_cast<bool>(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst);
}
[[nodiscard]] vk::ImageSubresourceLayers MakeImageSubresourceLayers() {
return vk::ImageSubresourceLayers{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
};
}
[[nodiscard]] vk::ImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
s32 swapchain_height) {
return vk::ImageBlit{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffsets =
std::array{
vk::Offset3D{
.x = 0,
.y = 0,
.z = 0,
},
vk::Offset3D{
.x = frame_width,
.y = frame_height,
.z = 1,
},
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffsets =
std::array{
vk::Offset3D{
.x = 0,
.y = 0,
.z = 0,
},
vk::Offset3D{
.x = swapchain_width,
.y = swapchain_height,
.z = 1,
},
},
};
}
RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_)
: window{window_}, instance{window, 0}, scheduler{instance}, swapchain{instance, window},
texture_cache{instance, scheduler} {
const u32 num_images = swapchain.GetImageCount();
const vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer |
vk::CommandPoolCreateFlagBits::eTransient,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
};
command_pool = device.createCommandPoolUnique(pool_info);
const vk::CommandBufferAllocateInfo alloc_info = {
.commandPool = *command_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = num_images,
};
const auto cmdbuffers = device.allocateCommandBuffers(alloc_info);
present_frames.resize(num_images);
for (u32 i = 0; i < num_images; i++) {
Frame& frame = present_frames[i];
frame.cmdbuf = cmdbuffers[i];
frame.render_ready = device.createSemaphore({});
frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
free_queue.push(&frame);
}
}
RendererVulkan::~RendererVulkan() {
scheduler.Finish();
const vk::Device device = instance.GetDevice();
for (auto& frame : present_frames) {
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
device.destroyImageView(frame.image_view);
device.destroySemaphore(frame.render_ready);
device.destroyFence(frame.present_done);
}
}
void RendererVulkan::RecreateFrame(Frame* frame, u32 width, u32 height) {
const vk::Device device = instance.GetDevice();
if (frame->image_view) {
device.destroyImageView(frame->image_view);
}
if (frame->image) {
vmaDestroyImage(instance.GetAllocator(), frame->image, frame->allocation);
}
const vk::Format format = swapchain.GetSurfaceFormat().format;
const vk::ImageCreateInfo image_info = {
.imageType = vk::ImageType::e2D,
.format = format,
.extent = {width, height, 1},
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk::SampleCountFlagBits::e1,
.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc,
};
const VmaAllocationCreateInfo alloc_info = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
};
VkImage unsafe_image{};
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
&unsafe_image, &frame->allocation, nullptr);
if (result != VK_SUCCESS) [[unlikely]] {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}",
vk::to_string(vk::Result{result}));
UNREACHABLE();
}
frame->image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {
.image = frame->image,
.viewType = vk::ImageViewType::e2D,
.format = format,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
frame->image_view = device.createImageView(view_info);
frame->width = width;
frame->height = height;
}
Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address) {
// Request presentation image from the texture cache.
auto& image = texture_cache.FindDisplayBuffer(attribute, cpu_address);
// Request a free presentation frame.
Frame* frame = GetRenderFrame();
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
scheduler.Record([frame, vk_image = vk::Image(image.image),
size = image.info.size](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier pre_barrier{
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = frame->image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
cmdbuf.blitImage(vk_image, vk::ImageLayout::eGeneral, frame->image,
vk::ImageLayout::eGeneral,
MakeImageBlit(size.width, size.height, frame->width, frame->height),
vk::Filter::eLinear);
});
// Flush pending vulkan operations.
scheduler.Flush(frame->render_ready);
scheduler.WaitWorker();
return frame;
}
void RendererVulkan::Present(Frame* frame) {
swapchain.AcquireNextImage();
const vk::Image swapchain_image = swapchain.Image();
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
};
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
cmdbuf.begin(begin_info);
const vk::Extent2D extent = swapchain.GetExtent();
const std::array pre_barriers{
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = frame->image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const vk::ImageMemoryBarrier post_barrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::ePresentSrcKHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
{}, {}, pre_barriers);
cmdbuf.blitImage(frame->image, vk::ImageLayout::eGeneral, swapchain_image,
vk::ImageLayout::eTransferDstOptimal,
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
vk::Filter::eLinear);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
cmdbuf.end();
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eAllGraphics,
};
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
const std::array wait_semaphores = {image_acquired, frame->render_ready};
vk::SubmitInfo submit_info = {
.waitSemaphoreCount = static_cast<u32>(wait_semaphores.size()),
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &present_ready,
};
try {
std::scoped_lock submit_lock{scheduler.submit_mutex};
instance.GetGraphicsQueue().submit(submit_info, frame->present_done);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what());
UNREACHABLE();
}
swapchain.Present();
// Free the frame for reuse
std::scoped_lock fl{free_mutex};
free_queue.push(frame);
free_cv.notify_one();
}
Frame* RendererVulkan::GetRenderFrame() {
// Wait for free presentation frames
Frame* frame;
{
std::unique_lock lock{free_mutex};
free_cv.wait(lock, [this] { return !free_queue.empty(); });
// Take the frame from the queue
frame = free_queue.front();
free_queue.pop();
}
const vk::Device device = instance.GetDevice();
vk::Result result{};
const auto wait = [&]() {
result = device.waitForFences(frame->present_done, false, std::numeric_limits<u64>::max());
return result;
};
// Wait for the presentation to be finished so all frame resources are free
while (wait() != vk::Result::eSuccess) {
// Retry if the waiting times out
if (result == vk::Result::eTimeout) {
continue;
}
}
// Reset fence for next queue submission.
device.resetFences(frame->present_done);
// If the window dimentions changed, recreate this frame
if (frame->width != window.getWidth() || frame->height != window.getHeight()) {
RecreateFrame(frame, window.getWidth(), window.getHeight());
}
return frame;
}
} // namespace Vulkan

View file

@ -0,0 +1,57 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Frontend {
class WindowSDL;
}
namespace Vulkan {
struct Frame {
u32 width;
u32 height;
VmaAllocation allocation;
vk::Image image;
vk::ImageView image_view;
vk::Semaphore render_ready;
vk::Fence present_done;
vk::CommandBuffer cmdbuf;
};
class RendererVulkan {
public:
explicit RendererVulkan(Frontend::WindowSDL& window);
~RendererVulkan();
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address);
void Present(Frame* frame);
void RecreateFrame(Frame* frame, u32 width, u32 height);
private:
Frame* GetRenderFrame();
private:
Frontend::WindowSDL& window;
Instance instance;
Scheduler scheduler;
Swapchain swapchain;
VideoCore::TextureCache texture_cache;
vk::UniqueCommandPool command_pool;
std::vector<Frame> present_frames;
std::queue<Frame*> free_queue;
std::mutex free_mutex;
std::condition_variable free_cv;
std::condition_variable_any frame_cv;
};
} // namespace Vulkan

View file

@ -0,0 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_common.h"
// Implement vma functions
#define VMA_IMPLEMENTATION
#include <vk_mem_alloc.h>
// Store the dispatch loader here
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE

View file

@ -0,0 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
// Include vulkan-hpp header
#define VK_ENABLE_BETA_EXTENSIONS
#define VK_NO_PROTOTYPES
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_NO_CONSTRUCTORS
#define VULKAN_HPP_NO_STRUCT_SETTERS
#include <vulkan/vulkan.hpp>
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1

View file

@ -0,0 +1,108 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
DescriptorUpdateQueue::DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max_)
: device{instance.GetDevice()}, descriptor_write_max{descriptor_write_max_} {
descriptor_infos = std::make_unique<DescriptorInfoUnion[]>(descriptor_write_max);
descriptor_writes = std::make_unique<vk::WriteDescriptorSet[]>(descriptor_write_max);
}
void DescriptorUpdateQueue::Flush() {
if (descriptor_write_end == 0) {
return;
}
device.updateDescriptorSets({std::span(descriptor_writes.get(), descriptor_write_end)}, {});
descriptor_write_end = 0;
}
void DescriptorUpdateQueue::AddStorageImage(vk::DescriptorSet target, u8 binding,
vk::ImageView image_view,
vk::ImageLayout image_layout) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& image_info = descriptor_infos[descriptor_write_end].image_info;
image_info.sampler = VK_NULL_HANDLE;
image_info.imageView = image_view;
image_info.imageLayout = image_layout;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage,
.pImageInfo = &image_info,
};
}
void DescriptorUpdateQueue::AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index,
vk::ImageView image_view, vk::Sampler sampler,
vk::ImageLayout image_layout) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& image_info = descriptor_infos[descriptor_write_end].image_info;
image_info.sampler = sampler;
image_info.imageView = image_view;
image_info.imageLayout = image_layout;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = array_index,
.descriptorCount = 1,
.descriptorType =
sampler ? vk::DescriptorType::eCombinedImageSampler : vk::DescriptorType::eSampledImage,
.pImageInfo = &image_info,
};
}
void DescriptorUpdateQueue::AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer,
vk::DeviceSize offset, vk::DeviceSize size,
vk::DescriptorType type) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_info;
buffer_info.buffer = buffer;
buffer_info.offset = offset;
buffer_info.range = size;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = type,
.pBufferInfo = &buffer_info,
};
}
void DescriptorUpdateQueue::AddTexelBuffer(vk::DescriptorSet target, u8 binding,
vk::BufferView buffer_view) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_view;
buffer_info = buffer_view;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_info,
};
}
} // namespace Vulkan

View file

@ -0,0 +1,51 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
struct DescriptorInfoUnion {
DescriptorInfoUnion() {}
union {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
};
};
class DescriptorUpdateQueue {
public:
explicit DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max = 2048);
~DescriptorUpdateQueue() = default;
void Flush();
void AddStorageImage(vk::DescriptorSet target, u8 binding, vk::ImageView image_view,
vk::ImageLayout image_layout = vk::ImageLayout::eGeneral);
void AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index,
vk::ImageView image_view, vk::Sampler sampler,
vk::ImageLayout imageLayout = vk::ImageLayout::eGeneral);
void AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer, vk::DeviceSize offset,
vk::DeviceSize size = VK_WHOLE_SIZE,
vk::DescriptorType type = vk::DescriptorType::eUniformBufferDynamic);
void AddTexelBuffer(vk::DescriptorSet target, u8 binding, vk::BufferView buffer_view);
private:
const vk::Device device;
const u32 descriptor_write_max;
std::unique_ptr<DescriptorInfoUnion[]> descriptor_infos;
std::unique_ptr<vk::WriteDescriptorSet[]> descriptor_writes;
u32 descriptor_write_end = 0;
};
} // namespace Vulkan

View file

@ -0,0 +1,271 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <span>
#include <boost/container/static_vector.hpp>
#include <fmt/format.h>
#include <fmt/ranges.h>
#include "common/assert.h"
#include "sdl_window.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include <vk_mem_alloc.h>
namespace Vulkan {
namespace {
std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
const std::vector extensions = physical.enumerateDeviceExtensionProperties();
std::vector<std::string> supported_extensions;
supported_extensions.reserve(extensions.size());
for (const auto& extension : extensions) {
supported_extensions.emplace_back(extension.extensionName.data());
}
return supported_extensions;
}
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
}
} // Anonymous namespace
Instance::Instance(bool enable_validation, bool dump_command_buffers)
: instance{CreateInstance(dl, Frontend::WindowSystemType::Headless, enable_validation,
dump_command_buffers)},
physical_devices{instance->enumeratePhysicalDevices()} {}
Instance::Instance(Frontend::WindowSDL& window, u32 physical_device_index)
: instance{CreateInstance(dl, window.getWindowInfo().type, true, false)},
debug_callback{CreateDebugCallback(*instance)}, physical_devices{
instance->enumeratePhysicalDevices()} {
const std::size_t num_physical_devices = static_cast<u16>(physical_devices.size());
ASSERT_MSG(physical_device_index < num_physical_devices,
"Invalid physical device index {} provided when only {} devices exist",
physical_device_index, num_physical_devices);
physical_device = physical_devices[physical_device_index];
available_extensions = GetSupportedExtensions(physical_device);
properties = physical_device.getProperties();
if (properties.apiVersion < TargetVulkanApiVersion) {
throw std::runtime_error(fmt::format(
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion)));
}
CollectDeviceParameters();
CreateDevice();
CollectToolingInfo();
}
Instance::~Instance() {
vmaDestroyAllocator(allocator);
}
std::string Instance::GetDriverVersionName() {
// Extracted from
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
const u32 version = properties.driverVersion;
if (driver_id == vk::DriverId::eNvidiaProprietary) {
const u32 major = (version >> 22) & 0x3ff;
const u32 minor = (version >> 14) & 0x0ff;
const u32 secondary = (version >> 6) & 0x0ff;
const u32 tertiary = version & 0x003f;
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
}
if (driver_id == vk::DriverId::eIntelProprietaryWindows) {
const u32 major = version >> 14;
const u32 minor = version & 0x3fff;
return fmt::format("{}.{}", major, minor);
}
return GetReadableVersion(version);
}
bool Instance::CreateDevice() {
const vk::StructureChain feature_chain = physical_device.getFeatures2<
vk::PhysicalDeviceFeatures2, vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT,
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR,
vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT,
vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT,
vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT,
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
const vk::StructureChain properties_chain =
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
features = feature_chain.get().features;
if (available_extensions.empty()) {
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
return false;
}
boost::container::static_vector<const char*, 13> enabled_extensions;
const auto add_extension = [&](std::string_view extension) -> bool {
const auto result =
std::find_if(available_extensions.begin(), available_extensions.end(),
[&](const std::string& name) { return name == extension; });
if (result != available_extensions.end()) {
LOG_INFO(Render_Vulkan, "Enabling extension: {}", extension);
enabled_extensions.push_back(extension.data());
return true;
}
LOG_WARNING(Render_Vulkan, "Extension {} unavailable.", extension);
return false;
};
add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
image_format_list = add_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
const auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) {
LOG_CRITICAL(Render_Vulkan, "Physical device reported no queues.");
return false;
}
bool graphics_queue_found = false;
for (std::size_t i = 0; i < family_properties.size(); i++) {
const u32 index = static_cast<u32>(i);
if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) {
queue_family_index = index;
graphics_queue_found = true;
}
}
if (!graphics_queue_found) {
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
return false;
}
static constexpr std::array<f32, 1> queue_priorities = {1.0f};
const vk::DeviceQueueCreateInfo queue_info = {
.queueFamilyIndex = queue_family_index,
.queueCount = static_cast<u32>(queue_priorities.size()),
.pQueuePriorities = queue_priorities.data(),
};
vk::StructureChain device_chain = {
vk::DeviceCreateInfo{
.queueCreateInfoCount = 1u,
.pQueueCreateInfos = &queue_info,
.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()),
.ppEnabledExtensionNames = enabled_extensions.data(),
},
vk::PhysicalDeviceFeatures2{
.features{
.robustBufferAccess = features.robustBufferAccess,
.geometryShader = features.geometryShader,
.logicOp = features.logicOp,
.samplerAnisotropy = features.samplerAnisotropy,
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
.shaderClipDistance = features.shaderClipDistance,
},
},
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR{
.timelineSemaphore = true,
},
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{
.extendedDynamicState = true,
},
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{
.extendedDynamicState2 = true,
.extendedDynamicState2LogicOp = true,
},
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{},
vk::PhysicalDeviceCustomBorderColorFeaturesEXT{
.customBorderColors = true,
.customBorderColorWithoutFormat = true,
},
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{
.indexTypeUint8 = true,
},
};
try {
device = physical_device.createDeviceUnique(device_chain.get());
} catch (vk::ExtensionNotPresentError& err) {
LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what());
return false;
}
VULKAN_HPP_DEFAULT_DISPATCHER.init(*device);
graphics_queue = device->getQueue(queue_family_index, 0);
present_queue = device->getQueue(queue_family_index, 0);
CreateAllocator();
return true;
}
void Instance::CreateAllocator() {
const VmaVulkanFunctions functions = {
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr,
};
const VmaAllocatorCreateInfo allocator_info = {
.physicalDevice = physical_device,
.device = *device,
.pVulkanFunctions = &functions,
.instance = *instance,
.vulkanApiVersion = TargetVulkanApiVersion,
};
const VkResult result = vmaCreateAllocator(&allocator_info, &allocator);
if (result != VK_SUCCESS) {
UNREACHABLE_MSG("Failed to initialize VMA with error {}",
vk::to_string(vk::Result{result}));
}
}
void Instance::CollectDeviceParameters() {
const vk::StructureChain property_chain =
physical_device
.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceDriverProperties>();
const vk::PhysicalDeviceDriverProperties driver =
property_chain.get<vk::PhysicalDeviceDriverProperties>();
driver_id = driver.driverID;
vendor_name = driver.driverName.data();
const std::string model_name{GetModelName()};
const std::string driver_version = GetDriverVersionName();
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
const std::string api_version = GetReadableVersion(properties.apiVersion);
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
}
void Instance::CollectToolingInfo() {
if (!tooling_info) {
return;
}
const auto tools = physical_device.getToolPropertiesEXT();
for (const vk::PhysicalDeviceToolProperties& tool : tools) {
const std::string_view name = tool.name;
LOG_INFO(Render_Vulkan, "Attached debugging tool: {}", name);
has_renderdoc = has_renderdoc || name == "RenderDoc";
has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
}
}
} // namespace Vulkan

View file

@ -0,0 +1,228 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "video_core/renderer_vulkan/vk_platform.h"
namespace Frontend {
class WindowSDL;
}
VK_DEFINE_HANDLE(VmaAllocator)
namespace Vulkan {
class Instance {
public:
explicit Instance(bool validation = false, bool dump_command_buffers = false);
explicit Instance(Frontend::WindowSDL& window, u32 physical_device_index);
~Instance();
/// Returns a formatted string for the driver version
std::string GetDriverVersionName();
/// Returns the Vulkan instance
vk::Instance GetInstance() const {
return *instance;
}
/// Returns the current physical device
vk::PhysicalDevice GetPhysicalDevice() const {
return physical_device;
}
/// Returns the Vulkan device
vk::Device GetDevice() const {
return *device;
}
/// Returns the VMA allocator handle
VmaAllocator GetAllocator() const {
return allocator;
}
/// Returns a list of the available physical devices
std::span<const vk::PhysicalDevice> GetPhysicalDevices() const {
return physical_devices;
}
/// Retrieve queue information
u32 GetGraphicsQueueFamilyIndex() const {
return queue_family_index;
}
u32 GetPresentQueueFamilyIndex() const {
return queue_family_index;
}
vk::Queue GetGraphicsQueue() const {
return graphics_queue;
}
vk::Queue GetPresentQueue() const {
return present_queue;
}
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
}
/// Returns true if anisotropic filtering is supported
bool IsAnisotropicFilteringSupported() const {
return features.samplerAnisotropy;
}
/// Returns true when VK_EXT_custom_border_color is supported
bool IsCustomBorderColorSupported() const {
return custom_border_color;
}
/// Returns true when VK_EXT_index_type_uint8 is supported
bool IsIndexTypeUint8Supported() const {
return index_type_uint8;
}
/// Returns true when VK_EXT_fragment_shader_interlock is supported
bool IsFragmentShaderInterlockSupported() const {
return fragment_shader_interlock;
}
/// Returns true when VK_KHR_image_format_list is supported
bool IsImageFormatListSupported() const {
return image_format_list;
}
/// Returns true when VK_EXT_pipeline_creation_cache_control is supported
bool IsPipelineCreationCacheControlSupported() const {
return pipeline_creation_cache_control;
}
/// Returns true when VK_EXT_shader_stencil_export is supported
bool IsShaderStencilExportSupported() const {
return shader_stencil_export;
}
/// Returns true when VK_EXT_external_memory_host is supported
bool IsExternalMemoryHostSupported() const {
return external_memory_host;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
}
/// Returns the device ID of the physical device
u32 GetDeviceID() const {
return properties.deviceID;
}
/// Returns the driver ID.
vk::DriverId GetDriverID() const {
return driver_id;
}
/// Returns the current driver version provided in Vulkan-formatted version numbers.
u32 GetDriverVersion() const {
return properties.driverVersion;
}
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
u32 ApiVersion() const {
return properties.apiVersion;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
}
/// Returns the list of available extensions.
std::span<const std::string> GetAvailableExtensions() const {
return available_extensions;
}
/// Returns the device name.
std::string_view GetModelName() const {
return properties.deviceName;
}
/// Returns the pipeline cache unique identifier
const auto GetPipelineCacheUUID() const {
return properties.pipelineCacheUUID;
}
/// Returns the minimum required alignment for uniforms
vk::DeviceSize UniformMinAlignment() const {
return properties.limits.minUniformBufferOffsetAlignment;
}
/// Returns the minimum alignemt required for accessing host-mapped device memory
vk::DeviceSize NonCoherentAtomSize() const {
return properties.limits.nonCoherentAtomSize;
}
/// Returns the maximum supported elements in a texel buffer
u32 MaxTexelBufferElements() const {
return properties.limits.maxTexelBufferElements;
}
/// Returns true if shaders can declare the ClipDistance attribute
bool IsShaderClipDistanceSupported() const {
return features.shaderClipDistance;
}
/// Returns the minimum imported host pointer alignment
u64 GetMinImportedHostPointerAlignment() const {
return min_imported_host_pointer_alignment;
}
private:
/// Creates the logical device opportunistically enabling extensions
bool CreateDevice();
/// Creates the VMA allocator handle
void CreateAllocator();
/// Collects telemetry information from the device.
void CollectDeviceParameters();
void CollectToolingInfo();
private:
vk::DynamicLoader dl;
vk::UniqueInstance instance;
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;
vk::PhysicalDeviceProperties properties;
vk::PhysicalDeviceFeatures features;
vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback;
std::string vendor_name;
VmaAllocator allocator{};
vk::Queue present_queue;
vk::Queue graphics_queue;
std::vector<vk::PhysicalDevice> physical_devices;
std::vector<std::string> available_extensions;
u32 queue_family_index{0};
bool image_view_reinterpretation{true};
bool timeline_semaphores{};
bool custom_border_color{};
bool index_type_uint8{};
bool fragment_shader_interlock{};
bool image_format_list{};
bool pipeline_creation_cache_control{};
bool fragment_shader_barycentric{};
bool shader_stencil_export{};
bool external_memory_host{};
u64 min_imported_host_pointer_alignment{};
bool tooling_info{};
bool debug_utils_supported{};
bool has_nsight_graphics{};
bool has_renderdoc{};
};
} // namespace Vulkan

View file

@ -0,0 +1,105 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <limits>
#include <mutex>
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
namespace Vulkan {
constexpr u64 WAIT_TIMEOUT = std::numeric_limits<u64>::max();
MasterSemaphore::MasterSemaphore(const Instance& instance_) : instance{instance_} {
const vk::StructureChain semaphore_chain = {
vk::SemaphoreCreateInfo{},
vk::SemaphoreTypeCreateInfo{
.semaphoreType = vk::SemaphoreType::eTimeline,
.initialValue = 0,
},
};
semaphore = instance.GetDevice().createSemaphoreUnique(semaphore_chain.get());
}
MasterSemaphore::~MasterSemaphore() = default;
void MasterSemaphore::Refresh() {
u64 this_tick{};
u64 counter{};
do {
this_tick = gpu_tick.load(std::memory_order_acquire);
counter = instance.GetDevice().getSemaphoreCounterValue(*semaphore);
if (counter < this_tick) {
return;
}
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
std::memory_order_relaxed));
}
void MasterSemaphore::Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
const vk::SemaphoreWaitInfo wait_info = {
.semaphoreCount = 1,
.pSemaphores = &semaphore.get(),
.pValues = &tick,
};
while (instance.GetDevice().waitSemaphores(&wait_info, WAIT_TIMEOUT) != vk::Result::eSuccess) {
}
Refresh();
}
void MasterSemaphore::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
u64 signal_value) {
cmdbuf.end();
const u32 num_signal_semaphores = signal ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{Handle(), signal};
const u32 num_wait_semaphores = wait ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{Handle(), wait};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::TimelineSemaphoreSubmitInfo timeline_si = {
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
try {
instance.GetGraphicsQueue().submit(submit_info);
} catch (vk::DeviceLostError& err) {
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
}
}
} // namespace Vulkan

View file

@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <condition_variable>
#include <thread>
#include <queue>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class Scheduler;
class MasterSemaphore {
public:
explicit MasterSemaphore(const Instance& instance_);
~MasterSemaphore();
[[nodiscard]] u64 CurrentTick() const noexcept {
return current_tick.load(std::memory_order_acquire);
}
[[nodiscard]] u64 KnownGpuTick() const noexcept {
return gpu_tick.load(std::memory_order_acquire);
}
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
}
[[nodiscard]] u64 NextTick() noexcept {
return current_tick.fetch_add(1, std::memory_order_release);
}
[[nodiscard]] vk::Semaphore Handle() const noexcept {
return semaphore.get();
}
/// Refresh the known GPU tick
void Refresh();
/// Waits for a tick to be hit on the GPU
void Wait(u64 tick);
/// Submits the provided command buffer for execution
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
u64 signal_value);
protected:
const Instance& instance;
vk::UniqueSemaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
};
} // namespace Vulkan

View file

@ -0,0 +1,232 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// Include the vulkan platform specific header
#if defined(ANDROID)
#define VK_USE_PLATFORM_ANDROID_KHR
#elif defined(WIN32)
#define VK_USE_PLATFORM_WIN32_KHR
#elif defined(__APPLE__)
#define VK_USE_PLATFORM_METAL_EXT
#else
#define VK_USE_PLATFORM_WAYLAND_KHR
#define VK_USE_PLATFORM_XLIB_KHR
#endif
#include <vector>
#include "common/assert.h"
#include "common/logging/log.h"
#include "sdl_window.h"
#include "video_core/renderer_vulkan/vk_platform.h"
namespace Vulkan {
static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data) {
switch (static_cast<u32>(callback_data->messageIdNumber)) {
case 0x609a13b: // Vertex attribute at location not consumed by shader
case 0xc81ad50e:
return VK_FALSE;
default:
break;
}
Common::Log::Level level{};
switch (severity) {
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:
level = Common::Log::Level::Error;
break;
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT:
level = Common::Log::Level::Info;
break;
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT:
level = Common::Log::Level::Debug;
break;
default:
level = Common::Log::Level::Info;
}
LOG_GENERIC(Common::Log::Class::Render_Vulkan, level, "{}: {}",
callback_data->pMessageIdName ? callback_data->pMessageIdName : "<null>",
callback_data->pMessage ? callback_data->pMessage : "<null>");
return VK_FALSE;
}
vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::WindowSDL& emu_window) {
const auto& window_info = emu_window.getWindowInfo();
vk::SurfaceKHR surface{};
#if defined(VK_USE_PLATFORM_WIN32_KHR)
if (window_info.type == Frontend::WindowSystemType::Windows) {
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
.hinstance = nullptr,
.hwnd = static_cast<HWND>(window_info.render_surface),
};
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
UNREACHABLE();
}
}
#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR)
if (window_info.type == Frontend::WindowSystemType::X11) {
const vk::XlibSurfaceCreateInfoKHR xlib_ci = {
.dpy = static_cast<Display*>(window_info.display_connection),
.window = reinterpret_cast<Window>(window_info.render_surface),
};
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
UNREACHABLE();
}
} else if (window_info.type == Frontend::WindowSystemType::Wayland) {
const vk::WaylandSurfaceCreateInfoKHR wayland_ci = {
.display = static_cast<wl_display*>(window_info.display_connection),
.surface = static_cast<wl_surface*>(window_info.render_surface),
};
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) !=
vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
UNREACHABLE();
}
}
#endif
if (!surface) {
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
UNREACHABLE();
}
return surface;
}
std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window_type,
bool enable_debug_utils) {
const auto properties = vk::enumerateInstanceExtensionProperties();
if (properties.empty()) {
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
return {};
}
// Add the windowing system specific extension
std::vector<const char*> extensions;
extensions.reserve(7);
switch (window_type) {
case Frontend::WindowSystemType::Headless:
break;
#if defined(VK_USE_PLATFORM_WIN32_KHR)
case Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR)
case Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
case Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
// Sanitize extension list
std::erase_if(extensions, [&](const char* extension) -> bool {
const auto it =
std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) {
return std::strcmp(extension, prop.extensionName) == 0;
});
if (it == properties.end()) {
LOG_INFO(Render_Vulkan, "Candidate instance extension {} is not available", extension);
return true;
}
return false;
});
return extensions;
}
vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemType window_type,
bool enable_validation, bool dump_command_buffers) {
auto vkGetInstanceProcAddr =
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
const u32 available_version = VULKAN_HPP_DEFAULT_DISPATCHER.vkEnumerateInstanceVersion
? vk::enumerateInstanceVersion()
: VK_API_VERSION_1_0;
if (available_version < TargetVulkanApiVersion) {
throw std::runtime_error(fmt::format(
"Vulkan {}.{} is required, but only {}.{} is supported by instance!",
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version)));
}
const auto extensions = GetInstanceExtensions(window_type, enable_validation);
const vk::ApplicationInfo application_info = {
.pApplicationName = "Citra",
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
.pEngineName = "Citra Vulkan",
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
.apiVersion = available_version,
};
u32 num_layers = 0;
std::array<const char*, 2> layers;
if (enable_validation) {
layers[num_layers++] = "VK_LAYER_KHRONOS_validation";
}
if (dump_command_buffers) {
layers[num_layers++] = "VK_LAYER_LUNARG_api_dump";
}
vk::InstanceCreateInfo instance_ci = {
.pApplicationInfo = &application_info,
.enabledLayerCount = num_layers,
.ppEnabledLayerNames = layers.data(),
.enabledExtensionCount = static_cast<u32>(extensions.size()),
.ppEnabledExtensionNames = extensions.data(),
};
auto instance = vk::createInstanceUnique(instance_ci);
VULKAN_HPP_DEFAULT_DISPATCHER.init(*instance);
return instance;
}
vk::UniqueDebugUtilsMessengerEXT CreateDebugCallback(vk::Instance instance) {
const vk::DebugUtilsMessengerCreateInfoEXT msg_ci = {
.messageSeverity = vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo |
vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning |
vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose,
.messageType = vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral |
vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
vk::DebugUtilsMessageTypeFlagBitsEXT::eDeviceAddressBinding |
vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
.pfnUserCallback = DebugUtilsCallback,
};
return instance.createDebugUtilsMessengerEXTUnique(msg_ci);
}
} // namespace Vulkan

View file

@ -0,0 +1,49 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <variant>
#include <fmt/format.h>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Frontend {
enum class WindowSystemType : u8;
class WindowSDL;
} // namespace Frontend
namespace Vulkan {
constexpr u32 TargetVulkanApiVersion = VK_API_VERSION_1_3;
vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::WindowSDL& emu_window);
vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemType window_type,
bool enable_validation, bool dump_command_buffers);
vk::UniqueDebugUtilsMessengerEXT CreateDebugCallback(vk::Instance instance);
template <typename T>
concept VulkanHandleType = vk::isVulkanHandleType<T>::value;
template <VulkanHandleType HandleType>
void SetObjectName(vk::Device device, const HandleType& handle, std::string_view debug_name) {
const vk::DebugUtilsObjectNameInfoEXT name_info = {
.objectType = HandleType::objectType,
.objectHandle = reinterpret_cast<u64>(static_cast<typename HandleType::NativeType>(handle)),
.pObjectName = debug_name.data(),
};
device.setDebugUtilsObjectNameEXT(name_info);
}
template <VulkanHandleType HandleType, typename... Args>
void SetObjectName(vk::Device device, const HandleType& handle, const char* format,
const Args&... args) {
const std::string debug_name = fmt::vformat(format, fmt::make_format_args(args...));
SetObjectName(device, handle, debug_name);
}
} // namespace Vulkan

View file

@ -0,0 +1,190 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstddef>
#include <optional>
#include <unordered_map>
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, std::size_t grow_step_)
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
std::size_t ResourcePool::CommitResource() {
u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](std::size_t begin,
std::size_t end) -> std::optional<std::size_t> {
for (std::size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
return std::nullopt;
};
// Try to find a free resource from the hinted position to the end.
auto found = search(hint_iterator, ticks.size());
if (!found) {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
gpu_tick = master_semaphore->KnownGpuTick();
found = search(hint_iterator, ticks.size());
}
if (!found) {
// Search from beginning to the hinted position.
found = search(0, hint_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const std::size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
hint_iterator = (*found + 1) % ticks.size();
return *found;
}
std::size_t ResourcePool::ManageOverflow() {
const std::size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
return old_capacity;
}
constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 4;
CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore)
: ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {
const vk::CommandPoolCreateInfo pool_create_info = {
.flags = vk::CommandPoolCreateFlagBits::eTransient |
vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
};
const vk::Device device = instance.GetDevice();
cmd_pool = device.createCommandPoolUnique(pool_create_info);
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, *cmd_pool, "CommandPool");
}
}
CommandPool::~CommandPool() = default;
void CommandPool::Allocate(std::size_t begin, std::size_t end) {
cmd_buffers.resize(end);
const vk::CommandBufferAllocateInfo buffer_alloc_info = {
.commandPool = *cmd_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = COMMAND_BUFFER_POOL_SIZE,
};
const vk::Device device = instance.GetDevice();
const auto result =
device.allocateCommandBuffers(&buffer_alloc_info, cmd_buffers.data() + begin);
ASSERT(result == vk::Result::eSuccess);
if (instance.HasDebuggingToolAttached()) {
for (std::size_t i = begin; i < end; ++i) {
SetObjectName(device, cmd_buffers[i], "CommandPool: Command Buffer {}", i);
}
}
}
vk::CommandBuffer CommandPool::Commit() {
const std::size_t index = CommitResource();
return cmd_buffers[index];
}
constexpr u32 DESCRIPTOR_SET_BATCH = 32;
DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
std::span<const vk::DescriptorSetLayoutBinding> bindings,
u32 descriptor_heap_count_)
: ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()},
descriptor_heap_count{descriptor_heap_count_} {
// Create descriptor set layout.
const vk::DescriptorSetLayoutCreateInfo layout_ci = {
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
descriptor_set_layout = device.createDescriptorSetLayoutUnique(layout_ci);
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, *descriptor_set_layout, "DescriptorSetLayout");
}
// Build descriptor set pool counts.
std::unordered_map<vk::DescriptorType, u16> descriptor_type_counts;
for (const auto& binding : bindings) {
descriptor_type_counts[binding.descriptorType] += binding.descriptorCount;
}
for (const auto& [type, count] : descriptor_type_counts) {
auto& pool_size = pool_sizes.emplace_back();
pool_size.descriptorCount = count * descriptor_heap_count;
pool_size.type = type;
}
// Create descriptor pool
AppendDescriptorPool();
}
DescriptorHeap::~DescriptorHeap() = default;
void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) {
ASSERT(end - begin == DESCRIPTOR_SET_BATCH);
descriptor_sets.resize(end);
hashes.resize(end);
std::array<vk::DescriptorSetLayout, DESCRIPTOR_SET_BATCH> layouts;
layouts.fill(*descriptor_set_layout);
u32 current_pool = 0;
vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = *pools[current_pool],
.descriptorSetCount = DESCRIPTOR_SET_BATCH,
.pSetLayouts = layouts.data(),
};
// Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new
// one.
while (true) {
const auto result =
device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin);
if (result == vk::Result::eSuccess) {
break;
}
if (result == vk::Result::eErrorOutOfPoolMemory) {
current_pool++;
if (current_pool == pools.size()) {
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
AppendDescriptorPool();
}
alloc_info.descriptorPool = *pools[current_pool];
}
}
}
vk::DescriptorSet DescriptorHeap::Commit() {
const std::size_t index = CommitResource();
return descriptor_sets[index];
}
void DescriptorHeap::AppendDescriptorPool() {
const vk::DescriptorPoolCreateInfo pool_info = {
.flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind,
.maxSets = descriptor_heap_count,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
auto& pool = pools.emplace_back();
pool = device.createDescriptorPoolUnique(pool_info);
}
} // namespace Vulkan

View file

@ -0,0 +1,93 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include <tsl/robin_map.h>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class MasterSemaphore;
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class ResourcePool {
public:
explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore* master_semaphore, std::size_t grow_step);
virtual ~ResourcePool() = default;
ResourcePool& operator=(ResourcePool&&) noexcept = default;
ResourcePool(ResourcePool&&) noexcept = default;
ResourcePool& operator=(const ResourcePool&) = default;
ResourcePool(const ResourcePool&) = default;
protected:
std::size_t CommitResource();
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(std::size_t begin, std::size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
std::size_t ManageOverflow();
protected:
MasterSemaphore* master_semaphore{nullptr};
std::size_t grow_step = 0; ///< Number of new resources created after an overflow
std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
class CommandPool final : public ResourcePool {
public:
explicit CommandPool(const Instance& instance, MasterSemaphore* master_semaphore);
~CommandPool() override;
void Allocate(std::size_t begin, std::size_t end) override;
vk::CommandBuffer Commit();
private:
const Instance& instance;
vk::UniqueCommandPool cmd_pool;
std::vector<vk::CommandBuffer> cmd_buffers;
};
class DescriptorHeap final : public ResourcePool {
public:
explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
std::span<const vk::DescriptorSetLayoutBinding> bindings,
u32 descriptor_heap_count = 1024);
~DescriptorHeap() override;
const vk::DescriptorSetLayout& Layout() const {
return *descriptor_set_layout;
}
void Allocate(std::size_t begin, std::size_t end) override;
vk::DescriptorSet Commit();
private:
void AppendDescriptorPool();
private:
vk::Device device;
vk::UniqueDescriptorSetLayout descriptor_set_layout;
u32 descriptor_heap_count;
std::vector<vk::DescriptorPoolSize> pool_sizes;
std::vector<vk::UniqueDescriptorPool> pools;
std::vector<vk::DescriptorSet> descriptor_sets;
std::vector<std::size_t> hashes;
};
} // namespace Vulkan

View file

@ -0,0 +1,181 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex>
#include <utility>
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf);
command->~Command();
command = next;
}
submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
}
Scheduler::Scheduler(const Instance& instance)
: master_semaphore{instance}, command_pool{instance, &master_semaphore}, use_worker_thread{
true} {
AllocateWorkerCommandBuffers();
if (use_worker_thread) {
AcquireNewChunk();
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
}
}
Scheduler::~Scheduler() = default;
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
// When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal, wait);
}
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
// When finishing, we need to wait for the submission to have executed on the device.
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal, wait);
Wait(presubmit_tick);
}
void Scheduler::WaitWorker() {
if (!use_worker_thread) {
return;
}
DispatchWork();
// Ensure the queue is drained.
{
std::unique_lock ql{queue_mutex};
event_cv.wait(ql, [this] { return work_queue.empty(); });
}
// Now wait for execution to finish.
// This needs to be done in the same order as WorkerThread.
std::scoped_lock el{execution_mutex};
}
void Scheduler::Wait(u64 tick) {
if (tick >= master_semaphore.CurrentTick()) {
// Make sure we are not waiting for the current tick without signalling
Flush();
}
master_semaphore.Wait(tick);
}
void Scheduler::DispatchWork() {
if (!use_worker_thread || chunk->Empty()) {
return;
}
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
event_cv.notify_all();
AcquireNewChunk();
}
void Scheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("VulkanWorker");
const auto TryPopQueue{[this](auto& work) -> bool {
if (work_queue.empty()) {
return false;
}
work = std::move(work_queue.front());
work_queue.pop();
event_cv.notify_all();
return true;
}};
while (!stop_token.stop_requested()) {
std::unique_ptr<CommandChunk> work;
{
std::unique_lock lk{queue_mutex};
// Wait for work.
event_cv.wait(lk, stop_token, [&] { return TryPopQueue(work); });
// If we've been asked to stop, we're done.
if (stop_token.stop_requested()) {
return;
}
// Exchange lock ownership so that we take the execution lock before
// the queue lock goes out of scope. This allows us to force execution
// to complete in the next step.
std::exchange(lk, std::unique_lock{execution_mutex});
// Perform the work, tracking whether the chunk was a submission
// before executing.
const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
// If the chunk was a submission, reallocate the command buffer.
if (has_submit) {
AllocateWorkerCommandBuffers();
}
}
{
std::scoped_lock rl{reserve_mutex};
// Recycle the chunk back to the reserve.
chunk_reserve.emplace_back(std::move(work));
}
}
}
void Scheduler::AllocateWorkerCommandBuffers() {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
};
current_cmdbuf = command_pool.Commit();
current_cmdbuf.begin(begin_info);
}
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
const u64 signal_value = master_semaphore.NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
std::scoped_lock lock{submit_mutex};
master_semaphore.SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value);
});
master_semaphore.Refresh();
if (!use_worker_thread) {
AllocateWorkerCommandBuffers();
} else {
chunk->MarkSubmit();
DispatchWork();
}
}
void Scheduler::AcquireNewChunk() {
std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.back());
chunk_reserve.pop_back();
}
} // namespace Vulkan

View file

@ -0,0 +1,192 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <functional>
#include <memory>
#include <thread>
#include <utility>
#include <queue>
#include "common/alignment.h"
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
class Instance;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Instance& instance);
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick);
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Records the command to the current chunk.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Registers a callback to perform on queue submission.
void RegisterOnSubmit(std::function<void()>&& func) {
on_submit = std::move(func);
}
/// Registers a callback to perform on queue submission.
void RegisterOnDispatch(std::function<void()>&& func) {
on_dispatch = std::move(func);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore.CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore.IsFree(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore* GetMasterSemaphore() noexcept {
return &master_semaphore;
}
std::mutex submit_mutex;
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override {
command(cmdbuf);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
recorded_counts++;
command_offset = Common::alignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* const current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
void MarkSubmit() {
submit = true;
}
bool Empty() const {
return recorded_counts == 0;
}
bool HasSubmit() const {
return submit;
}
private:
Command* first = nullptr;
Command* last = nullptr;
std::size_t recorded_counts = 0;
std::size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
};
private:
void WorkerThread(std::stop_token stop_token);
void AllocateWorkerCommandBuffers();
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
void AcquireNewChunk();
private:
MasterSemaphore master_semaphore;
CommandPool command_pool;
std::unique_ptr<CommandChunk> chunk;
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
vk::CommandBuffer current_cmdbuf;
std::function<void()> on_submit;
std::function<void()> on_dispatch;
std::mutex execution_mutex;
std::mutex reserve_mutex;
std::mutex queue_mutex;
std::condition_variable_any event_cv;
std::jthread worker_thread;
bool use_worker_thread;
};
} // namespace Vulkan

View file

@ -0,0 +1,230 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include <SPIRV/GlslangToSpv.h>
#include <glslang/Include/ResourceLimits.h>
#include <glslang/Public/ShaderLang.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
namespace {
constexpr TBuiltInResource DefaultTBuiltInResource = {
.maxLights = 32,
.maxClipPlanes = 6,
.maxTextureUnits = 32,
.maxTextureCoords = 32,
.maxVertexAttribs = 64,
.maxVertexUniformComponents = 4096,
.maxVaryingFloats = 64,
.maxVertexTextureImageUnits = 32,
.maxCombinedTextureImageUnits = 80,
.maxTextureImageUnits = 32,
.maxFragmentUniformComponents = 4096,
.maxDrawBuffers = 32,
.maxVertexUniformVectors = 128,
.maxVaryingVectors = 8,
.maxFragmentUniformVectors = 16,
.maxVertexOutputVectors = 16,
.maxFragmentInputVectors = 15,
.minProgramTexelOffset = -8,
.maxProgramTexelOffset = 7,
.maxClipDistances = 8,
.maxComputeWorkGroupCountX = 65535,
.maxComputeWorkGroupCountY = 65535,
.maxComputeWorkGroupCountZ = 65535,
.maxComputeWorkGroupSizeX = 1024,
.maxComputeWorkGroupSizeY = 1024,
.maxComputeWorkGroupSizeZ = 64,
.maxComputeUniformComponents = 1024,
.maxComputeTextureImageUnits = 16,
.maxComputeImageUniforms = 8,
.maxComputeAtomicCounters = 8,
.maxComputeAtomicCounterBuffers = 1,
.maxVaryingComponents = 60,
.maxVertexOutputComponents = 64,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxFragmentInputComponents = 128,
.maxImageUnits = 8,
.maxCombinedImageUnitsAndFragmentOutputs = 8,
.maxCombinedShaderOutputResources = 8,
.maxImageSamples = 0,
.maxVertexImageUniforms = 0,
.maxTessControlImageUniforms = 0,
.maxTessEvaluationImageUniforms = 0,
.maxGeometryImageUniforms = 0,
.maxFragmentImageUniforms = 8,
.maxCombinedImageUniforms = 8,
.maxGeometryTextureImageUnits = 16,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
.maxGeometryUniformComponents = 1024,
.maxGeometryVaryingComponents = 64,
.maxTessControlInputComponents = 128,
.maxTessControlOutputComponents = 128,
.maxTessControlTextureImageUnits = 16,
.maxTessControlUniformComponents = 1024,
.maxTessControlTotalOutputComponents = 4096,
.maxTessEvaluationInputComponents = 128,
.maxTessEvaluationOutputComponents = 128,
.maxTessEvaluationTextureImageUnits = 16,
.maxTessEvaluationUniformComponents = 1024,
.maxTessPatchComponents = 120,
.maxPatchVertices = 32,
.maxTessGenLevel = 64,
.maxViewports = 16,
.maxVertexAtomicCounters = 0,
.maxTessControlAtomicCounters = 0,
.maxTessEvaluationAtomicCounters = 0,
.maxGeometryAtomicCounters = 0,
.maxFragmentAtomicCounters = 8,
.maxCombinedAtomicCounters = 8,
.maxAtomicCounterBindings = 1,
.maxVertexAtomicCounterBuffers = 0,
.maxTessControlAtomicCounterBuffers = 0,
.maxTessEvaluationAtomicCounterBuffers = 0,
.maxGeometryAtomicCounterBuffers = 0,
.maxFragmentAtomicCounterBuffers = 1,
.maxCombinedAtomicCounterBuffers = 1,
.maxAtomicCounterBufferSize = 16384,
.maxTransformFeedbackBuffers = 4,
.maxTransformFeedbackInterleavedComponents = 64,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.maxSamples = 4,
.maxMeshOutputVerticesNV = 256,
.maxMeshOutputPrimitivesNV = 512,
.maxMeshWorkGroupSizeX_NV = 32,
.maxMeshWorkGroupSizeY_NV = 1,
.maxMeshWorkGroupSizeZ_NV = 1,
.maxTaskWorkGroupSizeX_NV = 32,
.maxTaskWorkGroupSizeY_NV = 1,
.maxTaskWorkGroupSizeZ_NV = 1,
.maxMeshViewCountNV = 4,
.maxDualSourceDrawBuffersEXT = 1,
.limits =
TLimits{
.nonInductiveForLoops = 1,
.whileLoops = 1,
.doWhileLoops = 1,
.generalUniformIndexing = 1,
.generalAttributeMatrixVectorIndexing = 1,
.generalVaryingIndexing = 1,
.generalSamplerIndexing = 1,
.generalVariableIndexing = 1,
.generalConstantMatrixVectorIndexing = 1,
},
};
EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
switch (stage) {
case vk::ShaderStageFlagBits::eVertex:
return EShLanguage::EShLangVertex;
case vk::ShaderStageFlagBits::eGeometry:
return EShLanguage::EShLangGeometry;
case vk::ShaderStageFlagBits::eFragment:
return EShLanguage::EShLangFragment;
case vk::ShaderStageFlagBits::eCompute:
return EShLanguage::EShLangCompute;
default:
UNREACHABLE_MSG("Unkown shader stage {}", vk::to_string(stage));
}
return EShLanguage::EShLangVertex;
}
bool InitializeCompiler() {
static bool glslang_initialized = false;
if (glslang_initialized) {
return true;
}
if (!glslang::InitializeProcess()) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
return false;
}
std::atexit([]() { glslang::FinalizeProcess(); });
glslang_initialized = true;
return true;
}
} // Anonymous namespace
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device) {
if (!InitializeCompiler()) {
return {};
}
EProfile profile = ECoreProfile;
EShMessages messages =
static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
EShLanguage lang = ToEshShaderStage(stage);
const int default_version = 450;
const char* pass_source_code = code.data();
int pass_source_code_length = static_cast<int>(code.size());
auto shader = std::make_unique<glslang::TShader>(lang);
shader->setEnvTarget(glslang::EShTargetSpv,
glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
glslang::TShader::ForbidIncluder includer;
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages,
includer)) [[unlikely]] {
LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
shader->getInfoDebugLog());
LOG_INFO(Render_Vulkan, "Shader Source:\n{}", code);
return {};
}
// Even though there's only a single shader, we still need to link it to generate SPV
auto program = std::make_unique<glslang::TProgram>();
program->addShader(shader.get());
if (!program->link(messages)) {
LOG_INFO(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(),
program->getInfoDebugLog());
return {};
}
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
std::vector<u32> out_code;
spv::SpvBuildLogger logger;
glslang::SpvOptions options;
// Enable optimizations on the generated SPIR-V code.
options.disableOptimizer = false;
options.validate = false;
options.optimizeSize = true;
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
const std::string spv_messages = logger.getAllMessages();
if (!spv_messages.empty()) {
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
}
return CompileSPV(out_code, device);
}
vk::ShaderModule CompileSPV(std::span<const u32> code, vk::Device device) {
const vk::ShaderModuleCreateInfo shader_info = {
.codeSize = code.size() * sizeof(u32),
.pCode = code.data(),
};
try {
return device.createShaderModule(shader_info);
} catch (vk::SystemError& err) {
UNREACHABLE_MSG("{}", err.what());
}
return {};
}
} // namespace Vulkan

View file

@ -0,0 +1,28 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
/**
* @brief Creates a vulkan shader module from GLSL by converting it to SPIR-V using glslang.
* @param code The string containing GLSL code.
* @param stage The pipeline stage the shader will be used in.
* @param device The vulkan device handle.
*/
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device);
/**
* @brief Creates a vulkan shader module from SPIR-V bytecode.
* @param code The SPIR-V bytecode data.
* @param device The vulkan device handle
*/
vk::ShaderModule CompileSPV(std::span<const u32> code, vk::Device device);
} // namespace Vulkan

View file

@ -0,0 +1,234 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
namespace {
std::string_view BufferTypeName(BufferType type) {
switch (type) {
case BufferType::Upload:
return "Upload";
case BufferType::Download:
return "Download";
case BufferType::Stream:
return "Stream";
default:
return "Invalid";
}
}
vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) {
switch (type) {
case BufferType::Upload:
return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
case BufferType::Download:
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached;
case BufferType::Stream:
return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
default:
UNREACHABLE_MSG("Unknown buffer type {}", static_cast<u32>(type));
return vk::MemoryPropertyFlagBits::eHostVisible;
}
}
static std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
vk::MemoryPropertyFlags wanted) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type.
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) {
vk::MemoryPropertyFlags flags = MakePropertyFlags(type);
std::optional preferred_type = FindMemoryType(properties, flags);
constexpr std::array remove_flags = {
vk::MemoryPropertyFlagBits::eHostCached,
vk::MemoryPropertyFlagBits::eHostCoherent,
};
for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) {
flags &= ~remove_flags[i];
preferred_type = FindMemoryType(properties, flags);
}
ASSERT_MSG(preferred_type, "No suitable memory type found");
return preferred_type.value();
}
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
} // Anonymous namespace
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
vk::BufferUsageFlags usage_, u64 size, BufferType type_)
: instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()},
stream_buffer_size{size}, usage{usage_}, type{type_} {
CreateBuffers(size);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
StreamBuffer::~StreamBuffer() {
device.unmapMemory(memory);
device.destroyBuffer(buffer);
device.freeMemory(memory);
}
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
if (!is_coherent && type == BufferType::Stream) {
size = Common::alignUp(size, instance.NonCoherentAtomSize());
}
ASSERT(size <= stream_buffer_size);
mapped_size = size;
if (alignment > 0) {
offset = Common::alignUp(offset, alignment);
}
bool invalidate{false};
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidate = true;
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
}
const u64 mapped_upper_bound = offset + size;
WaitPendingOperations(mapped_upper_bound);
return std::make_tuple(mapped + offset, offset, invalidate);
}
void StreamBuffer::Commit(u64 size) {
if (!is_coherent && type == BufferType::Stream) {
size = Common::alignUp(size, instance.NonCoherentAtomSize());
}
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
size);
const vk::MappedMemoryRange range = {
.memory = memory,
.offset = offset,
.size = size,
};
if (!is_coherent && type == BufferType::Download) {
device.invalidateMappedMemoryRanges(range);
} else if (!is_coherent) {
device.flushMappedMemoryRanges(range);
}
offset += size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void StreamBuffer::CreateBuffers(u64 prefered_size) {
const vk::Device device = instance.GetDevice();
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties, type);
const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type];
const u32 preferred_heap = mem_type.heapIndex;
is_coherent =
static_cast<bool>(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent);
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const vk::DeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
// As per DXVK's example, using `heap_size / 2`
const vk::DeviceSize allocable_size = heap_size / 2;
buffer = device.createBuffer({
.size = std::min(prefered_size, allocable_size),
.usage = usage,
});
const auto requirements_chain =
device
.getBufferMemoryRequirements2<vk::MemoryRequirements2, vk::MemoryDedicatedRequirements>(
{.buffer = buffer});
const auto& requirements = requirements_chain.get<vk::MemoryRequirements2>();
const auto& dedicated_requirements = requirements_chain.get<vk::MemoryDedicatedRequirements>();
stream_buffer_size = static_cast<u64>(requirements.memoryRequirements.size);
LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KiB with flags {}",
BufferTypeName(type), stream_buffer_size / 1024,
vk::to_string(mem_type.propertyFlags));
if (dedicated_requirements.prefersDedicatedAllocation) {
vk::StructureChain<vk::MemoryAllocateInfo, vk::MemoryDedicatedAllocateInfo> alloc_chain =
{};
auto& alloc_info = alloc_chain.get<vk::MemoryAllocateInfo>();
alloc_info.allocationSize = requirements.memoryRequirements.size;
alloc_info.memoryTypeIndex = preferred_type;
auto& dedicated_alloc_info = alloc_chain.get<vk::MemoryDedicatedAllocateInfo>();
dedicated_alloc_info.buffer = buffer;
memory = device.allocateMemory(alloc_chain.get());
} else {
memory = device.allocateMemory({
.allocationSize = requirements.memoryRequirements.size,
.memoryTypeIndex = preferred_type,
});
}
device.bindBufferMemory(buffer, memory, 0);
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
}
}
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
} // namespace Vulkan

View file

@ -0,0 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <optional>
#include <span>
#include <tuple>
#include <vector>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
enum class BufferType : u32 {
Upload = 0,
Download = 1,
Stream = 2,
};
class Instance;
class Scheduler;
class StreamBuffer final {
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
public:
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
vk::BufferUsageFlags usage, u64 size,
BufferType type = BufferType::Stream);
~StreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);
vk::Buffer Handle() const noexcept {
return buffer;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(u64 prefered_size);
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
private:
const Instance& instance; ///< Vulkan instance.
Scheduler& scheduler; ///< Command scheduler.
vk::Device device;
vk::Buffer buffer; ///< Mapped buffer.
vk::DeviceMemory memory; ///< Memory allocation.
u8* mapped{}; ///< Pointer to the mapped memory
u64 stream_buffer_size{}; ///< Stream buffer size.
vk::BufferUsageFlags usage{};
BufferType type;
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
bool is_coherent{}; ///< True if the buffer is coherent
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan

View file

@ -0,0 +1,225 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <limits>
#include "common/assert.h"
#include "common/logging/log.h"
#include "sdl_window.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Vulkan {
Swapchain::Swapchain(const Instance& instance_, const Frontend::WindowSDL& window)
: instance{instance_}, surface{CreateSurface(instance.GetInstance(), window)} {
FindPresentFormat();
Create(window.getWidth(), window.getHeight(), surface);
}
Swapchain::~Swapchain() {
Destroy();
instance.GetInstance().destroySurfaceKHR(surface);
}
void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
width = width_;
height = height_;
surface = surface_;
needs_recreation = false;
Destroy();
SetSurfaceProperties();
const std::array queue_family_indices = {
instance.GetGraphicsQueueFamilyIndex(),
instance.GetPresentQueueFamilyIndex(),
};
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
const vk::SharingMode sharing_mode =
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
const vk::SwapchainCreateInfoKHR swapchain_info = {
.surface = surface,
.minImageCount = image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment |
vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.imageSharingMode = sharing_mode,
.queueFamilyIndexCount = queue_family_indices_count,
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.compositeAlpha = composite_alpha,
.presentMode = vk::PresentModeKHR::eMailbox,
.clipped = true,
.oldSwapchain = nullptr,
};
try {
swapchain = instance.GetDevice().createSwapchainKHR(swapchain_info);
} catch (vk::SystemError& err) {
LOG_CRITICAL(Render_Vulkan, "{}", err.what());
UNREACHABLE();
}
SetupImages();
RefreshSemaphores();
}
bool Swapchain::AcquireNextImage() {
vk::Device device = instance.GetDevice();
vk::Result result =
device.acquireNextImageKHR(swapchain, std::numeric_limits<u64>::max(),
image_acquired[frame_index], VK_NULL_HANDLE, &image_index);
switch (result) {
case vk::Result::eSuccess:
break;
case vk::Result::eSuboptimalKHR:
case vk::Result::eErrorSurfaceLostKHR:
case vk::Result::eErrorOutOfDateKHR:
needs_recreation = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Swapchain acquire returned unknown result {}",
vk::to_string(result));
UNREACHABLE();
break;
}
return !needs_recreation;
}
void Swapchain::Present() {
if (needs_recreation) {
return;
}
const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1,
.pWaitSemaphores = &present_ready[image_index],
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &image_index,
};
try {
[[maybe_unused]] vk::Result result = instance.GetPresentQueue().presentKHR(present_info);
} catch (vk::OutOfDateKHRError&) {
needs_recreation = true;
} catch (const vk::SystemError& err) {
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed {}", err.what());
UNREACHABLE();
}
frame_index = (frame_index + 1) % image_count;
}
void Swapchain::FindPresentFormat() {
const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
// If there is a single undefined surface format, the device doesn't care, so we'll just use
// RGBA.
if (formats[0].format == vk::Format::eUndefined) {
surface_format.format = vk::Format::eR8G8B8A8Unorm;
surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
return;
}
// Try to find a suitable format.
for (const vk::SurfaceFormatKHR& sformat : formats) {
vk::Format format = sformat.format;
if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) {
continue;
}
surface_format.format = format;
surface_format.colorSpace = sformat.colorSpace;
return;
}
UNREACHABLE_MSG("Unable to find required swapchain format!");
}
void Swapchain::SetSurfaceProperties() {
const vk::SurfaceCapabilitiesKHR capabilities =
instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
extent.width = std::max(capabilities.minImageExtent.width,
std::min(capabilities.maxImageExtent.width, width));
extent.height = std::max(capabilities.minImageExtent.height,
std::min(capabilities.maxImageExtent.height, height));
}
// Select number of images in swap chain, we prefer one buffer in the background to work on
image_count = capabilities.minImageCount + 1;
if (capabilities.maxImageCount > 0) {
image_count = std::min(image_count, capabilities.maxImageCount);
}
// Prefer identity transform if possible
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & transform)) {
transform = capabilities.currentTransform;
}
// Opaque is not supported everywhere.
composite_alpha = vk::CompositeAlphaFlagBitsKHR::eOpaque;
if (!(capabilities.supportedCompositeAlpha & vk::CompositeAlphaFlagBitsKHR::eOpaque)) {
composite_alpha = vk::CompositeAlphaFlagBitsKHR::eInherit;
}
}
void Swapchain::Destroy() {
vk::Device device = instance.GetDevice();
if (swapchain) {
device.destroySwapchainKHR(swapchain);
}
for (u32 i = 0; i < image_count; i++) {
device.destroySemaphore(image_acquired[i]);
device.destroySemaphore(present_ready[i]);
}
image_acquired.clear();
present_ready.clear();
}
void Swapchain::RefreshSemaphores() {
const vk::Device device = instance.GetDevice();
image_acquired.resize(image_count);
present_ready.resize(image_count);
for (vk::Semaphore& semaphore : image_acquired) {
semaphore = device.createSemaphore({});
}
for (vk::Semaphore& semaphore : present_ready) {
semaphore = device.createSemaphore({});
}
if (instance.HasDebuggingToolAttached()) {
for (u32 i = 0; i < image_count; ++i) {
SetObjectName(device, image_acquired[i], "Swapchain Semaphore: image_acquired {}", i);
SetObjectName(device, present_ready[i], "Swapchain Semaphore: present_ready {}", i);
}
}
}
void Swapchain::SetupImages() {
vk::Device device = instance.GetDevice();
images = device.getSwapchainImagesKHR(swapchain);
image_count = static_cast<u32>(images.size());
if (instance.HasDebuggingToolAttached()) {
for (u32 i = 0; i < image_count; ++i) {
SetObjectName(device, images[i], "Swapchain Image {}", i);
}
}
}
} // namespace Vulkan

View file

@ -0,0 +1,113 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <vector>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Frontend {
class WindowSDL;
}
namespace Vulkan {
class Instance;
class Scheduler;
class Swapchain {
public:
explicit Swapchain(const Instance& instance, const Frontend::WindowSDL& window);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, vk::SurfaceKHR surface);
/// Acquires the next image in the swapchain.
bool AcquireNextImage();
/// Presents the current image and move to the next one
void Present();
vk::SurfaceKHR GetSurface() const {
return surface;
}
vk::Image Image() const {
return images[image_index];
}
vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
}
vk::SwapchainKHR GetHandle() const {
return swapchain;
}
u32 GetWidth() const {
return width;
}
u32 GetHeight() const {
return height;
}
u32 GetImageCount() const {
return image_count;
}
u32 GetFrameIndex() const {
return frame_index;
}
vk::Extent2D GetExtent() const {
return extent;
}
[[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const {
return image_acquired[frame_index];
}
[[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const {
return present_ready[image_index];
}
private:
/// Selects the best available swapchain image format
void FindPresentFormat();
/// Sets the surface properties according to device capabilities
void SetSurfaceProperties();
/// Destroys current swapchain resources
void Destroy();
/// Performs creation of image views and framebuffers from the swapchain images
void SetupImages();
/// Creates the image acquired and present ready semaphores
void RefreshSemaphores();
private:
const Instance& instance;
vk::SwapchainKHR swapchain{};
vk::SurfaceKHR surface{};
vk::SurfaceFormatKHR surface_format;
vk::Extent2D extent;
vk::SurfaceTransformFlagBitsKHR transform;
vk::CompositeAlphaFlagBitsKHR composite_alpha;
std::vector<vk::Image> images;
std::vector<vk::Semaphore> image_acquired;
std::vector<vk::Semaphore> present_ready;
u32 width = 0;
u32 height = 0;
u32 image_count = 0;
u32 image_index = 0;
u32 frame_index = 0;
bool needs_recreation = true;
};
} // namespace Vulkan

View file

@ -0,0 +1,151 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h"
#include <vk_mem_alloc.h>
namespace VideoCore {
using namespace Vulkan;
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
using Libraries::VideoOut::TilingMode;
[[nodiscard]] vk::Format ConvertPixelFormat(const VideoOutFormat format) {
switch (format) {
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb:
return vk::Format::eA8B8G8R8SrgbPack32;
case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb:
return vk::Format::eA2R10G10B10UnormPack32;
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
[[nodiscard]] vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eSampled;
if (false /*&& IsDepthStencilFormat(format)*/) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
// usage |= vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eStorage;
}
return usage;
}
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
const auto& attrib = group.attrib;
is_tiled = attrib.tiling_mode == TilingMode::Tile;
pixel_format = ConvertPixelFormat(attrib.pixel_format);
type = vk::ImageType::e2D;
size.width = attrib.width;
size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}
UniqueImage::~UniqueImage() {
if (image) {
vmaDestroyImage(allocator, image, allocation);
}
}
void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
const VmaAllocationCreateInfo alloc_info = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
};
const VkImageCreateInfo image_ci_unsafe = static_cast<VkImageCreateInfo>(image_ci);
VkImage unsafe_image{};
VkResult result = vmaCreateImage(allocator, &image_ci_unsafe, &alloc_info, &unsafe_image,
&allocation, nullptr);
ASSERT_MSG(result == VK_SUCCESS, "Failed allocating image with error {}",
vk::to_string(vk::Result{result}));
image = vk::Image{unsafe_image};
}
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const ImageInfo& info_, VAddr cpu_addr)
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr} {
vk::ImageCreateFlags flags{};
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
info.size.width == info.size.height) {
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
}
if (info.type == vk::ImageType::e3D) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
const vk::ImageCreateInfo image_ci = {
.flags = flags,
.imageType = info.type,
.format = info.pixel_format,
.extent{
.width = info.size.width,
.height = info.size.height,
.depth = info.size.depth,
},
.mipLevels = static_cast<u32>(info.resources.levels),
.arrayLayers = static_cast<u32>(info.resources.layers),
.tiling = vk::ImageTiling::eOptimal,
.usage = ImageUsageFlags(info.pixel_format),
.initialLayout = vk::ImageLayout::eUndefined,
};
image.Create(image_ci);
const vk::Image handle = image;
scheduler->Record([handle](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier init_barrier = {
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eNone,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = handle,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe,
vk::PipelineStageFlagBits::eTopOfPipe,
vk::DependencyFlagBits::eByRegion, {}, {}, init_barrier);
});
const bool is_32bpp = info.pixel_format == vk::Format::eB8G8R8A8Srgb ||
info.pixel_format == vk::Format::eA8B8G8R8SrgbPack32;
ASSERT(info.is_tiled && is_32bpp);
if (Config::isNeoMode()) {
guest_size_bytes = info.pitch * 128 * ((info.size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = info.pitch * 128 * ((info.size.height + 63) & (~63)) * 4;
}
cpu_addr_end = cpu_addr + guest_size_bytes;
}
Image::~Image() = default;
} // namespace VideoCore

View file

@ -0,0 +1,116 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
namespace Vulkan {
class Instance;
class Scheduler;
} // namespace Vulkan
VK_DEFINE_HANDLE(VmaAllocation)
VK_DEFINE_HANDLE(VmaAllocator)
namespace VideoCore {
enum ImageFlagBits : u32 {
CpuModified = 1 << 2, ///< Contents have been modified from the CPU
GpuModified = 1 << 3, ///< Contents have been modified from the GPU
Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined;
vk::ImageType type = vk::ImageType::e1D;
SubresourceExtent resources;
Extent3D size{1, 1, 1};
u32 pitch;
};
struct Handle {
VmaAllocation allocation;
VkImage image;
Handle() = default;
Handle(Handle&& other)
: image{std::exchange(other.image, VK_NULL_HANDLE)},
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {}
Handle& operator=(Handle&& other) {
image = std::exchange(other.image, VK_NULL_HANDLE);
allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
return *this;
}
};
struct UniqueImage {
explicit UniqueImage(vk::Device device, VmaAllocator allocator);
~UniqueImage();
UniqueImage(const UniqueImage&) = delete;
UniqueImage& operator=(const UniqueImage&) = delete;
UniqueImage(UniqueImage&& other) : image{std::exchange(other.image, VK_NULL_HANDLE)} {}
UniqueImage& operator=(UniqueImage&& other) {
image = std::exchange(other.image, VK_NULL_HANDLE);
return *this;
}
void Create(const vk::ImageCreateInfo& image_ci);
operator vk::Image() const {
return image;
}
private:
vk::Device device;
VmaAllocator allocator;
VmaAllocation allocation;
vk::Image image{};
};
struct Image {
explicit Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageInfo& info, VAddr cpu_addr);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
ImageInfo info;
UniqueImage image;
vk::ImageAspectFlags aspect_mask;
u32 guest_size_bytes = 0;
size_t channel = 0;
ImageFlagBits flags = ImageFlagBits::CpuModified;
VAddr cpu_addr = 0;
VAddr cpu_addr_end = 0;
u64 modification_tick = 0;
};
} // namespace VideoCore

View file

@ -0,0 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image_view.h"
namespace VideoCore {
[[nodiscard]] vk::ImageViewType ConvertImageViewType(const ImageViewType type) {
switch (type) {
case ImageViewType::e1D:
return vk::ImageViewType::e1D;
case ImageViewType::e2D:
return vk::ImageViewType::e2D;
case ImageViewType::e3D:
return vk::ImageViewType::e3D;
case ImageViewType::Buffer:
break;
default:
break;
}
UNREACHABLE_MSG("Invalid image type={}", static_cast<u32>(type));
return {};
}
[[nodiscard]] vk::Format ConvertPixelFormat(const PixelFormat format) {
switch (format) {
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageViewInfo& info_, vk::Image image)
: info{info_} {
const vk::ImageViewCreateInfo image_view_ci = {
.image = image,
.viewType = ConvertImageViewType(info.type),
.format = ConvertPixelFormat(info.format),
.components{
.r = vk::ComponentSwizzle::eIdentity,
.g = vk::ComponentSwizzle::eIdentity,
.b = vk::ComponentSwizzle::eIdentity,
.a = vk::ComponentSwizzle::eIdentity,
},
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0U,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
image_view = instance.GetDevice().createImageViewUnique(image_view_ci);
}
ImageView::~ImageView() = default;
} // namespace VideoCore

View file

@ -0,0 +1,58 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
namespace Vulkan {
class Instance;
class Scheduler;
} // namespace Vulkan
namespace VideoCore {
enum class ImageViewType : u32 {
e1D,
e2D,
Cube,
e3D,
e1DArray,
e2DArray,
CubeArray,
Buffer,
};
enum class SwizzleSource : u32 {
Zero = 0,
One = 1,
R = 2,
G = 3,
B = 4,
A = 5,
};
struct ImageViewInfo {
ImageViewType type{};
PixelFormat format{};
SubresourceRange range;
u8 x_source = static_cast<u8>(SwizzleSource::R);
u8 y_source = static_cast<u8>(SwizzleSource::G);
u8 z_source = static_cast<u8>(SwizzleSource::B);
u8 w_source = static_cast<u8>(SwizzleSource::A);
};
class ImageView {
explicit ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageViewInfo& info, vk::Image image);
~ImageView();
ImageId image_id{};
Extent3D size{0, 0, 0};
ImageViewInfo info{};
vk::UniqueImageView image_view;
};
} // namespace VideoCore

View file

@ -0,0 +1,176 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bit>
#include <compare>
#include <numeric>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "common/types.h"
namespace VideoCore {
struct SlotId {
static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX;
}
u32 index = INVALID_INDEX;
};
template <class T>
class SlotVector {
constexpr static std::size_t InitialCapacity = 1024;
public:
SlotVector() {
Reserve(InitialCapacity);
}
~SlotVector() noexcept {
std::size_t index = 0;
for (u64 bits : stored_bitset) {
for (std::size_t bit = 0; bits; ++bit, bits >>= 1) {
if ((bits & 1) != 0) {
values[index + bit].object.~T();
}
}
index += 64;
}
delete[] values;
}
[[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id);
return values[id.index].object;
}
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id);
return values[id.index].object;
}
template <typename... Args>
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
template <typename... Args>
[[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept {
const u32 index = FreeValueIndex();
T& existing_value = values[existing_id.index].object;
new (&values[index].object) T(std::move(existing_value));
existing_value.~T();
new (&values[existing_id.index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
}
std::size_t size() const noexcept {
return values_capacity - free_list.size();
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
};
union Entry {
Entry() noexcept : dummy{} {}
~Entry() noexcept {}
NonTrivialDummy dummy;
T object;
};
void SetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] |= u64(1) << (index % 64);
}
void ResetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
}
bool ReadStorageBit(u32 index) noexcept {
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
}
void ValidateIndex([[maybe_unused]] SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
}
[[nodiscard]] u32 FreeValueIndex() noexcept {
if (free_list.empty()) {
Reserve(values_capacity ? (values_capacity << 1) : 1);
}
const u32 free_index = free_list.back();
free_list.pop_back();
return free_index;
}
void Reserve(std::size_t new_capacity) noexcept {
Entry* const new_values = new Entry[new_capacity];
std::size_t index = 0;
for (u64 bits : stored_bitset) {
for (std::size_t bit = 0; bits; ++bit, bits >>= 1) {
const std::size_t i = index + bit;
if ((bits & 1) == 0) {
continue;
}
T& old_value = values[i].object;
new (&new_values[i].object) T(std::move(old_value));
old_value.~T();
}
index += 64;
}
stored_bitset.resize((new_capacity + 63) / 64);
const std::size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(),
static_cast<u32>(values_capacity));
delete[] values;
values = new_values;
values_capacity = new_capacity;
}
Entry* values = nullptr;
std::size_t values_capacity = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
};
} // namespace VideoCore
template <>
struct std::hash<VideoCore::SlotId> {
std::size_t operator()(const VideoCore::SlotId& id) const noexcept {
return std::hash<u32>{}(id.index);
}
};

View file

@ -0,0 +1,210 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/tile_manager.h"
#ifndef _WIN64
#include <signal.h>
#include <sys/mman.h>
#endif
namespace VideoCore {
static TextureCache* g_texture_cache = nullptr;
#ifndef _WIN64
void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
if (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) {
g_texture_cache->OnCpuWrite(address);
} else {
// Read not supported!
UNREACHABLE();
}
}
#endif
static constexpr u64 StreamBufferSize = 128_MB;
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
: instance{instance_}, scheduler{scheduler_}, staging{instance, scheduler,
vk::BufferUsageFlagBits::eTransferSrc,
StreamBufferSize,
Vulkan::BufferType::Upload} {
#ifndef _WIN64
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, SIGSEGV);
using HandlerType = decltype(sigaction::sa_sigaction);
struct sigaction guest_access_fault {};
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask;
sigaction(SIGSEGV, &guest_access_fault, nullptr);
#endif
g_texture_cache = this;
}
TextureCache::~TextureCache() = default;
void TextureCache::OnCpuWrite(VAddr address) {
const VAddr address_aligned = address & ~((1 << PageBits) - 1);
ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image, image_id);
});
}
Image& TextureCache::FindDisplayBuffer(const Libraries::VideoOut::BufferAttributeGroup& group,
VAddr cpu_address) {
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, group.size_in_bytes, [&](ImageId image_id, Image& image) {
if (image.cpu_addr == cpu_address) {
image_ids.push_back(image_id);
}
});
ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!");
ImageId image_id{};
if (image_ids.empty()) {
image_id = slot_images.insert(instance, scheduler, ImageInfo{group}, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids[0];
}
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
RefreshImage(image);
TrackImage(image, image_id);
}
return image;
}
void TextureCache::RefreshImage(Image& image) {
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(image.guest_size_bytes, 0);
ConvertTileToLinear(data, reinterpret_cast<const u8*>(image.cpu_addr), image.info.size.width,
image.info.size.height, Config::isNeoMode());
staging.Commit(image.guest_size_bytes);
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
const vk::Buffer src_buffer = staging.Handle();
const vk::Image dst_image = image.image;
scheduler.Record([src_buffer, dst_image, image_copy](vk::CommandBuffer cmdbuf) {
cmdbuf.copyBufferToImage(src_buffer, dst_image, vk::ImageLayout::eGeneral, image_copy);
});
}
void TextureCache::RegisterImage(ImageId image_id) {
Image& image = slot_images[image_id];
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
}
void TextureCache::UnregisterImage(ImageId image_id) {
Image& image = slot_images[image_id];
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits);
return;
}
auto& image_ids = page_it.value();
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits);
return;
}
image_ids.erase(vector_it);
});
slot_images.erase(image_id);
}
void TextureCache::TrackImage(Image& image, ImageId image_id) {
if (True(image.flags & ImageFlagBits::Tracked)) {
return;
}
image.flags |= ImageFlagBits::Tracked;
UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
}
void TextureCache::UntrackImage(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::Tracked)) {
return;
}
image.flags &= ~ImageFlagBits::Tracked;
UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
}
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
const u64 num_pages = ((addr + size - 1) >> PageBits) - (addr >> PageBits) + 1;
const u64 page_start = addr >> PageBits;
const u64 page_end = page_start + num_pages;
const auto pages_interval =
decltype(cached_pages)::interval_type::right_open(page_start, page_end);
if (delta > 0) {
cached_pages.add({pages_interval, delta});
}
const auto& range = cached_pages.equal_range(pages_interval);
for (const auto& [range, count] : boost::make_iterator_range(range)) {
const auto interval = range & pages_interval;
const VAddr interval_start_addr = boost::icl::first(interval) << PageBits;
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageBits;
const u32 interval_size = interval_end_addr - interval_start_addr;
#ifndef _WIN64
void* addr = reinterpret_cast<void*>(interval_start_addr);
if (delta > 0 && count == delta) {
mprotect(addr, interval_size, PROT_NONE);
} else if (delta < 0 && count == -delta) {
mprotect(addr, interval_size, PROT_READ | PROT_WRITE);
} else {
ASSERT(count >= 0);
}
#endif
}
if (delta < 0) {
cached_pages.add({pages_interval, delta});
}
}
} // namespace VideoCore

View file

@ -0,0 +1,120 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <forward_list>
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/slot_vector.h"
namespace Core::Libraries::VideoOut {
struct BufferAttributeGroup;
}
namespace VideoCore {
class TextureCache {
static constexpr u64 PageBits = 14;
public:
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
~TextureCache();
/// Invalidates any image in the logical page range.
void OnCpuWrite(VAddr address);
/// Retrieves the image handle of the image with the provided attributes and address.
Image& FindDisplayBuffer(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address);
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PageBits;
for (u64 page = addr >> PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (image.flags & ImageFlagBits::Picked) {
continue;
}
image.flags |= ImageFlagBits::Picked;
images.push_back(image_id);
if constexpr (BOOL_BREAK) {
if (func(image_id, image)) {
return true;
}
} else {
func(image_id, image);
}
}
if constexpr (BOOL_BREAK) {
return false;
}
});
for (const ImageId image_id : images) {
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
}
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);
/// Reuploads image contents.
void RefreshImage(Image& image);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(Image& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(Image& image, ImageId image_id);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta);
private:
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
Vulkan::StreamBuffer staging;
SlotVector<Image> slot_images;
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
boost::icl::interval_map<VAddr, s32> cached_pages;
};
} // namespace VideoCore

View file

@ -0,0 +1,164 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "video_core/texture_cache/tile_manager.h"
namespace VideoCore {
static u32 IntLog2(u32 i) {
return 31 - __builtin_clz(i | 1u);
}
class TileManager32 {
public:
u32 m_macro_tile_height = 0;
u32 m_bank_height = 0;
u32 m_num_banks = 0;
u32 m_num_pipes = 0;
u32 m_padded_width = 0;
u32 m_padded_height = 0;
u32 m_pipe_bits = 0;
u32 m_bank_bits = 0;
void Init(u32 width, u32 height, bool is_neo) {
m_macro_tile_height = (is_neo ? 128 : 64);
m_bank_height = is_neo ? 2 : 1;
m_num_banks = is_neo ? 8 : 16;
m_num_pipes = is_neo ? 16 : 8;
m_padded_width = width;
if (height == 1080) {
m_padded_height = is_neo ? 1152 : 1088;
}
if (height == 720) {
m_padded_height = 768;
}
m_pipe_bits = is_neo ? 4 : 3;
m_bank_bits = is_neo ? 3 : 4;
}
static u32 getElementIdx(u32 x, u32 y) {
u32 elem = 0;
elem |= ((x >> 0u) & 0x1u) << 0u;
elem |= ((x >> 1u) & 0x1u) << 1u;
elem |= ((y >> 0u) & 0x1u) << 2u;
elem |= ((x >> 2u) & 0x1u) << 3u;
elem |= ((y >> 1u) & 0x1u) << 4u;
elem |= ((y >> 2u) & 0x1u) << 5u;
return elem;
}
static u32 getPipeIdx(u32 x, u32 y, bool is_neo) {
u32 pipe = 0;
if (!is_neo) {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
} else {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
}
return pipe;
}
static u32 getBankIdx(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks,
u32 num_pipes) {
const u32 x_shift_offset = IntLog2(bank_width * num_pipes);
const u32 y_shift_offset = IntLog2(bank_height);
const u32 xs = x >> x_shift_offset;
const u32 ys = y >> y_shift_offset;
u32 bank = 0;
switch (num_banks) {
case 8:
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
break;
case 16:
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
break;
default:;
}
return bank;
}
u64 getTiledOffs(u32 x, u32 y, bool is_neo) const {
u64 element_index = getElementIdx(x, y);
u32 xh = x;
u32 yh = y;
u64 pipe = getPipeIdx(xh, yh, is_neo);
u64 bank = getBankIdx(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
u64 element_offset = (element_index * 32);
u64 tile_split_slice = 0;
if (tile_bytes > 512) {
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
element_offset %= (static_cast<u64>(512) * 8);
tile_bytes = 512;
}
u64 macro_tile_bytes =
(128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
u64 macro_tiles_per_row = m_padded_width / 128;
u64 macro_tile_row_index = y / m_macro_tile_height;
u64 macro_tile_column_index = x / 128;
u64 macro_tile_index =
(macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
u64 slice_offset = tile_split_slice * slice_bytes;
u64 tile_row_index = (y / 8) % m_bank_height;
u64 tile_index = tile_row_index;
u64 tile_offset = tile_index * tile_bytes;
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
bank ^= tile_split_slice_rotation;
bank &= (m_num_banks - 1);
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
u64 bit_offset = total_offset & 0x7u;
total_offset /= 8;
u64 pipe_interleave_offset = total_offset & 0xffu;
u64 offset = total_offset >> 8u;
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) |
(offset << (8u + m_pipe_bits + m_bank_bits));
return ((byte_offset << 3u) | bit_offset) / 8;
}
};
void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_neo) {
TileManager32 t;
t.Init(width, height, is_neo);
for (u32 y = 0; y < height; y++) {
u32 x = 0;
u64 linear_offset = y * width * 4;
for (; x + 1 < width; x += 2) {
auto tiled_offset = t.getTiledOffs(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
linear_offset += 8;
}
if (x < width) {
auto tiled_offset = t.getTiledOffs(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
}
}
}
} // namespace VideoCore

View file

@ -0,0 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace VideoCore {
/// Converts tiled texture data to linear format.
void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool neo);
} // namespace VideoCore

View file

@ -0,0 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "video_core/texture_cache/slot_vector.h"
namespace VideoCore {
using ImageId = SlotId;
using ImageViewId = SlotId;
struct Offset2D {
s32 x;
s32 y;
};
struct Offset3D {
s32 x;
s32 y;
s32 z;
};
struct Region2D {
Offset2D start;
Offset2D end;
};
struct Extent2D {
u32 width;
u32 height;
};
struct Extent3D {
u32 width;
u32 height;
u32 depth;
};
struct SubresourceLayers {
s32 base_level = 0;
s32 base_layer = 0;
s32 num_layers = 1;
};
struct SubresourceBase {
s32 level = 0;
s32 layer = 0;
};
struct SubresourceExtent {
s32 levels = 1;
s32 layers = 1;
};
struct SubresourceRange {
SubresourceBase base;
SubresourceExtent extent;
};
struct ImageCopy {
SubresourceLayers src_subresource;
SubresourceLayers dst_subresource;
Offset3D src_offset;
Offset3D dst_offset;
Extent3D extent;
};
struct BufferImageCopy {
std::size_t buffer_offset;
std::size_t buffer_size;
u32 buffer_row_length;
u32 buffer_image_height;
SubresourceLayers image_subresource;
Offset3D image_offset;
Extent3D image_extent;
};
struct BufferCopy {
u64 src_offset;
u64 dst_offset;
std::size_t size;
};
} // namespace VideoCore