video_core: Rewrite vulkan and videoout

This commit is contained in:
GPUCode 2024-04-14 17:09:51 +03:00
parent 0a94899c86
commit c01b6f8397
89 changed files with 5378 additions and 2150 deletions

View file

@ -0,0 +1,151 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h"
#include <vk_mem_alloc.h>
namespace VideoCore {
using namespace Vulkan;
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
using Libraries::VideoOut::TilingMode;
[[nodiscard]] vk::Format ConvertPixelFormat(const VideoOutFormat format) {
switch (format) {
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb:
return vk::Format::eA8B8G8R8SrgbPack32;
case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb:
return vk::Format::eA2R10G10B10UnormPack32;
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
[[nodiscard]] vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eSampled;
if (false /*&& IsDepthStencilFormat(format)*/) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
// usage |= vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eStorage;
}
return usage;
}
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
const auto& attrib = group.attrib;
is_tiled = attrib.tiling_mode == TilingMode::Tile;
pixel_format = ConvertPixelFormat(attrib.pixel_format);
type = vk::ImageType::e2D;
size.width = attrib.width;
size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}
UniqueImage::~UniqueImage() {
if (image) {
vmaDestroyImage(allocator, image, allocation);
}
}
void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
const VmaAllocationCreateInfo alloc_info = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
};
const VkImageCreateInfo image_ci_unsafe = static_cast<VkImageCreateInfo>(image_ci);
VkImage unsafe_image{};
VkResult result = vmaCreateImage(allocator, &image_ci_unsafe, &alloc_info, &unsafe_image,
&allocation, nullptr);
ASSERT_MSG(result == VK_SUCCESS, "Failed allocating image with error {}",
vk::to_string(vk::Result{result}));
image = vk::Image{unsafe_image};
}
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const ImageInfo& info_, VAddr cpu_addr)
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr} {
vk::ImageCreateFlags flags{};
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
info.size.width == info.size.height) {
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
}
if (info.type == vk::ImageType::e3D) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
const vk::ImageCreateInfo image_ci = {
.flags = flags,
.imageType = info.type,
.format = info.pixel_format,
.extent{
.width = info.size.width,
.height = info.size.height,
.depth = info.size.depth,
},
.mipLevels = static_cast<u32>(info.resources.levels),
.arrayLayers = static_cast<u32>(info.resources.layers),
.tiling = vk::ImageTiling::eOptimal,
.usage = ImageUsageFlags(info.pixel_format),
.initialLayout = vk::ImageLayout::eUndefined,
};
image.Create(image_ci);
const vk::Image handle = image;
scheduler->Record([handle](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier init_barrier = {
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eNone,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = handle,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe,
vk::PipelineStageFlagBits::eTopOfPipe,
vk::DependencyFlagBits::eByRegion, {}, {}, init_barrier);
});
const bool is_32bpp = info.pixel_format == vk::Format::eB8G8R8A8Srgb ||
info.pixel_format == vk::Format::eA8B8G8R8SrgbPack32;
ASSERT(info.is_tiled && is_32bpp);
if (Config::isNeoMode()) {
guest_size_bytes = info.pitch * 128 * ((info.size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = info.pitch * 128 * ((info.size.height + 63) & (~63)) * 4;
}
cpu_addr_end = cpu_addr + guest_size_bytes;
}
Image::~Image() = default;
} // namespace VideoCore

View file

@ -0,0 +1,116 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
namespace Vulkan {
class Instance;
class Scheduler;
} // namespace Vulkan
VK_DEFINE_HANDLE(VmaAllocation)
VK_DEFINE_HANDLE(VmaAllocator)
namespace VideoCore {
enum ImageFlagBits : u32 {
CpuModified = 1 << 2, ///< Contents have been modified from the CPU
GpuModified = 1 << 3, ///< Contents have been modified from the GPU
Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined;
vk::ImageType type = vk::ImageType::e1D;
SubresourceExtent resources;
Extent3D size{1, 1, 1};
u32 pitch;
};
struct Handle {
VmaAllocation allocation;
VkImage image;
Handle() = default;
Handle(Handle&& other)
: image{std::exchange(other.image, VK_NULL_HANDLE)},
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {}
Handle& operator=(Handle&& other) {
image = std::exchange(other.image, VK_NULL_HANDLE);
allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
return *this;
}
};
struct UniqueImage {
explicit UniqueImage(vk::Device device, VmaAllocator allocator);
~UniqueImage();
UniqueImage(const UniqueImage&) = delete;
UniqueImage& operator=(const UniqueImage&) = delete;
UniqueImage(UniqueImage&& other) : image{std::exchange(other.image, VK_NULL_HANDLE)} {}
UniqueImage& operator=(UniqueImage&& other) {
image = std::exchange(other.image, VK_NULL_HANDLE);
return *this;
}
void Create(const vk::ImageCreateInfo& image_ci);
operator vk::Image() const {
return image;
}
private:
vk::Device device;
VmaAllocator allocator;
VmaAllocation allocation;
vk::Image image{};
};
struct Image {
explicit Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageInfo& info, VAddr cpu_addr);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
ImageInfo info;
UniqueImage image;
vk::ImageAspectFlags aspect_mask;
u32 guest_size_bytes = 0;
size_t channel = 0;
ImageFlagBits flags = ImageFlagBits::CpuModified;
VAddr cpu_addr = 0;
VAddr cpu_addr_end = 0;
u64 modification_tick = 0;
};
} // namespace VideoCore

View file

@ -0,0 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image_view.h"
namespace VideoCore {
[[nodiscard]] vk::ImageViewType ConvertImageViewType(const ImageViewType type) {
switch (type) {
case ImageViewType::e1D:
return vk::ImageViewType::e1D;
case ImageViewType::e2D:
return vk::ImageViewType::e2D;
case ImageViewType::e3D:
return vk::ImageViewType::e3D;
case ImageViewType::Buffer:
break;
default:
break;
}
UNREACHABLE_MSG("Invalid image type={}", static_cast<u32>(type));
return {};
}
[[nodiscard]] vk::Format ConvertPixelFormat(const PixelFormat format) {
switch (format) {
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageViewInfo& info_, vk::Image image)
: info{info_} {
const vk::ImageViewCreateInfo image_view_ci = {
.image = image,
.viewType = ConvertImageViewType(info.type),
.format = ConvertPixelFormat(info.format),
.components{
.r = vk::ComponentSwizzle::eIdentity,
.g = vk::ComponentSwizzle::eIdentity,
.b = vk::ComponentSwizzle::eIdentity,
.a = vk::ComponentSwizzle::eIdentity,
},
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0U,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
image_view = instance.GetDevice().createImageViewUnique(image_view_ci);
}
ImageView::~ImageView() = default;
} // namespace VideoCore

View file

@ -0,0 +1,58 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
namespace Vulkan {
class Instance;
class Scheduler;
} // namespace Vulkan
namespace VideoCore {
enum class ImageViewType : u32 {
e1D,
e2D,
Cube,
e3D,
e1DArray,
e2DArray,
CubeArray,
Buffer,
};
enum class SwizzleSource : u32 {
Zero = 0,
One = 1,
R = 2,
G = 3,
B = 4,
A = 5,
};
struct ImageViewInfo {
ImageViewType type{};
PixelFormat format{};
SubresourceRange range;
u8 x_source = static_cast<u8>(SwizzleSource::R);
u8 y_source = static_cast<u8>(SwizzleSource::G);
u8 z_source = static_cast<u8>(SwizzleSource::B);
u8 w_source = static_cast<u8>(SwizzleSource::A);
};
class ImageView {
explicit ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageViewInfo& info, vk::Image image);
~ImageView();
ImageId image_id{};
Extent3D size{0, 0, 0};
ImageViewInfo info{};
vk::UniqueImageView image_view;
};
} // namespace VideoCore

View file

@ -0,0 +1,176 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bit>
#include <compare>
#include <numeric>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "common/types.h"
namespace VideoCore {
struct SlotId {
static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX;
}
u32 index = INVALID_INDEX;
};
template <class T>
class SlotVector {
constexpr static std::size_t InitialCapacity = 1024;
public:
SlotVector() {
Reserve(InitialCapacity);
}
~SlotVector() noexcept {
std::size_t index = 0;
for (u64 bits : stored_bitset) {
for (std::size_t bit = 0; bits; ++bit, bits >>= 1) {
if ((bits & 1) != 0) {
values[index + bit].object.~T();
}
}
index += 64;
}
delete[] values;
}
[[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id);
return values[id.index].object;
}
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id);
return values[id.index].object;
}
template <typename... Args>
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
template <typename... Args>
[[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept {
const u32 index = FreeValueIndex();
T& existing_value = values[existing_id.index].object;
new (&values[index].object) T(std::move(existing_value));
existing_value.~T();
new (&values[existing_id.index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
}
std::size_t size() const noexcept {
return values_capacity - free_list.size();
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
};
union Entry {
Entry() noexcept : dummy{} {}
~Entry() noexcept {}
NonTrivialDummy dummy;
T object;
};
void SetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] |= u64(1) << (index % 64);
}
void ResetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
}
bool ReadStorageBit(u32 index) noexcept {
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
}
void ValidateIndex([[maybe_unused]] SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
}
[[nodiscard]] u32 FreeValueIndex() noexcept {
if (free_list.empty()) {
Reserve(values_capacity ? (values_capacity << 1) : 1);
}
const u32 free_index = free_list.back();
free_list.pop_back();
return free_index;
}
void Reserve(std::size_t new_capacity) noexcept {
Entry* const new_values = new Entry[new_capacity];
std::size_t index = 0;
for (u64 bits : stored_bitset) {
for (std::size_t bit = 0; bits; ++bit, bits >>= 1) {
const std::size_t i = index + bit;
if ((bits & 1) == 0) {
continue;
}
T& old_value = values[i].object;
new (&new_values[i].object) T(std::move(old_value));
old_value.~T();
}
index += 64;
}
stored_bitset.resize((new_capacity + 63) / 64);
const std::size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(),
static_cast<u32>(values_capacity));
delete[] values;
values = new_values;
values_capacity = new_capacity;
}
Entry* values = nullptr;
std::size_t values_capacity = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
};
} // namespace VideoCore
template <>
struct std::hash<VideoCore::SlotId> {
std::size_t operator()(const VideoCore::SlotId& id) const noexcept {
return std::hash<u32>{}(id.index);
}
};

View file

@ -0,0 +1,210 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/tile_manager.h"
#ifndef _WIN64
#include <signal.h>
#include <sys/mman.h>
#endif
namespace VideoCore {
static TextureCache* g_texture_cache = nullptr;
#ifndef _WIN64
void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
if (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) {
g_texture_cache->OnCpuWrite(address);
} else {
// Read not supported!
UNREACHABLE();
}
}
#endif
static constexpr u64 StreamBufferSize = 128_MB;
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
: instance{instance_}, scheduler{scheduler_}, staging{instance, scheduler,
vk::BufferUsageFlagBits::eTransferSrc,
StreamBufferSize,
Vulkan::BufferType::Upload} {
#ifndef _WIN64
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, SIGSEGV);
using HandlerType = decltype(sigaction::sa_sigaction);
struct sigaction guest_access_fault {};
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask;
sigaction(SIGSEGV, &guest_access_fault, nullptr);
#endif
g_texture_cache = this;
}
TextureCache::~TextureCache() = default;
void TextureCache::OnCpuWrite(VAddr address) {
const VAddr address_aligned = address & ~((1 << PageBits) - 1);
ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image, image_id);
});
}
Image& TextureCache::FindDisplayBuffer(const Libraries::VideoOut::BufferAttributeGroup& group,
VAddr cpu_address) {
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, group.size_in_bytes, [&](ImageId image_id, Image& image) {
if (image.cpu_addr == cpu_address) {
image_ids.push_back(image_id);
}
});
ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!");
ImageId image_id{};
if (image_ids.empty()) {
image_id = slot_images.insert(instance, scheduler, ImageInfo{group}, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids[0];
}
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
RefreshImage(image);
TrackImage(image, image_id);
}
return image;
}
void TextureCache::RefreshImage(Image& image) {
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(image.guest_size_bytes, 0);
ConvertTileToLinear(data, reinterpret_cast<const u8*>(image.cpu_addr), image.info.size.width,
image.info.size.height, Config::isNeoMode());
staging.Commit(image.guest_size_bytes);
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
const vk::Buffer src_buffer = staging.Handle();
const vk::Image dst_image = image.image;
scheduler.Record([src_buffer, dst_image, image_copy](vk::CommandBuffer cmdbuf) {
cmdbuf.copyBufferToImage(src_buffer, dst_image, vk::ImageLayout::eGeneral, image_copy);
});
}
void TextureCache::RegisterImage(ImageId image_id) {
Image& image = slot_images[image_id];
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
}
void TextureCache::UnregisterImage(ImageId image_id) {
Image& image = slot_images[image_id];
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits);
return;
}
auto& image_ids = page_it.value();
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits);
return;
}
image_ids.erase(vector_it);
});
slot_images.erase(image_id);
}
void TextureCache::TrackImage(Image& image, ImageId image_id) {
if (True(image.flags & ImageFlagBits::Tracked)) {
return;
}
image.flags |= ImageFlagBits::Tracked;
UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
}
void TextureCache::UntrackImage(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::Tracked)) {
return;
}
image.flags &= ~ImageFlagBits::Tracked;
UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
}
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
const u64 num_pages = ((addr + size - 1) >> PageBits) - (addr >> PageBits) + 1;
const u64 page_start = addr >> PageBits;
const u64 page_end = page_start + num_pages;
const auto pages_interval =
decltype(cached_pages)::interval_type::right_open(page_start, page_end);
if (delta > 0) {
cached_pages.add({pages_interval, delta});
}
const auto& range = cached_pages.equal_range(pages_interval);
for (const auto& [range, count] : boost::make_iterator_range(range)) {
const auto interval = range & pages_interval;
const VAddr interval_start_addr = boost::icl::first(interval) << PageBits;
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageBits;
const u32 interval_size = interval_end_addr - interval_start_addr;
#ifndef _WIN64
void* addr = reinterpret_cast<void*>(interval_start_addr);
if (delta > 0 && count == delta) {
mprotect(addr, interval_size, PROT_NONE);
} else if (delta < 0 && count == -delta) {
mprotect(addr, interval_size, PROT_READ | PROT_WRITE);
} else {
ASSERT(count >= 0);
}
#endif
}
if (delta < 0) {
cached_pages.add({pages_interval, delta});
}
}
} // namespace VideoCore

View file

@ -0,0 +1,120 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <forward_list>
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/slot_vector.h"
namespace Core::Libraries::VideoOut {
struct BufferAttributeGroup;
}
namespace VideoCore {
class TextureCache {
static constexpr u64 PageBits = 14;
public:
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
~TextureCache();
/// Invalidates any image in the logical page range.
void OnCpuWrite(VAddr address);
/// Retrieves the image handle of the image with the provided attributes and address.
Image& FindDisplayBuffer(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address);
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PageBits;
for (u64 page = addr >> PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (image.flags & ImageFlagBits::Picked) {
continue;
}
image.flags |= ImageFlagBits::Picked;
images.push_back(image_id);
if constexpr (BOOL_BREAK) {
if (func(image_id, image)) {
return true;
}
} else {
func(image_id, image);
}
}
if constexpr (BOOL_BREAK) {
return false;
}
});
for (const ImageId image_id : images) {
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
}
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);
/// Reuploads image contents.
void RefreshImage(Image& image);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(Image& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(Image& image, ImageId image_id);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta);
private:
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
Vulkan::StreamBuffer staging;
SlotVector<Image> slot_images;
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
boost::icl::interval_map<VAddr, s32> cached_pages;
};
} // namespace VideoCore

View file

@ -0,0 +1,164 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "video_core/texture_cache/tile_manager.h"
namespace VideoCore {
static u32 IntLog2(u32 i) {
return 31 - __builtin_clz(i | 1u);
}
class TileManager32 {
public:
u32 m_macro_tile_height = 0;
u32 m_bank_height = 0;
u32 m_num_banks = 0;
u32 m_num_pipes = 0;
u32 m_padded_width = 0;
u32 m_padded_height = 0;
u32 m_pipe_bits = 0;
u32 m_bank_bits = 0;
void Init(u32 width, u32 height, bool is_neo) {
m_macro_tile_height = (is_neo ? 128 : 64);
m_bank_height = is_neo ? 2 : 1;
m_num_banks = is_neo ? 8 : 16;
m_num_pipes = is_neo ? 16 : 8;
m_padded_width = width;
if (height == 1080) {
m_padded_height = is_neo ? 1152 : 1088;
}
if (height == 720) {
m_padded_height = 768;
}
m_pipe_bits = is_neo ? 4 : 3;
m_bank_bits = is_neo ? 3 : 4;
}
static u32 getElementIdx(u32 x, u32 y) {
u32 elem = 0;
elem |= ((x >> 0u) & 0x1u) << 0u;
elem |= ((x >> 1u) & 0x1u) << 1u;
elem |= ((y >> 0u) & 0x1u) << 2u;
elem |= ((x >> 2u) & 0x1u) << 3u;
elem |= ((y >> 1u) & 0x1u) << 4u;
elem |= ((y >> 2u) & 0x1u) << 5u;
return elem;
}
static u32 getPipeIdx(u32 x, u32 y, bool is_neo) {
u32 pipe = 0;
if (!is_neo) {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
} else {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
}
return pipe;
}
static u32 getBankIdx(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks,
u32 num_pipes) {
const u32 x_shift_offset = IntLog2(bank_width * num_pipes);
const u32 y_shift_offset = IntLog2(bank_height);
const u32 xs = x >> x_shift_offset;
const u32 ys = y >> y_shift_offset;
u32 bank = 0;
switch (num_banks) {
case 8:
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
break;
case 16:
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
break;
default:;
}
return bank;
}
u64 getTiledOffs(u32 x, u32 y, bool is_neo) const {
u64 element_index = getElementIdx(x, y);
u32 xh = x;
u32 yh = y;
u64 pipe = getPipeIdx(xh, yh, is_neo);
u64 bank = getBankIdx(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
u64 element_offset = (element_index * 32);
u64 tile_split_slice = 0;
if (tile_bytes > 512) {
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
element_offset %= (static_cast<u64>(512) * 8);
tile_bytes = 512;
}
u64 macro_tile_bytes =
(128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
u64 macro_tiles_per_row = m_padded_width / 128;
u64 macro_tile_row_index = y / m_macro_tile_height;
u64 macro_tile_column_index = x / 128;
u64 macro_tile_index =
(macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
u64 slice_offset = tile_split_slice * slice_bytes;
u64 tile_row_index = (y / 8) % m_bank_height;
u64 tile_index = tile_row_index;
u64 tile_offset = tile_index * tile_bytes;
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
bank ^= tile_split_slice_rotation;
bank &= (m_num_banks - 1);
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
u64 bit_offset = total_offset & 0x7u;
total_offset /= 8;
u64 pipe_interleave_offset = total_offset & 0xffu;
u64 offset = total_offset >> 8u;
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) |
(offset << (8u + m_pipe_bits + m_bank_bits));
return ((byte_offset << 3u) | bit_offset) / 8;
}
};
void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_neo) {
TileManager32 t;
t.Init(width, height, is_neo);
for (u32 y = 0; y < height; y++) {
u32 x = 0;
u64 linear_offset = y * width * 4;
for (; x + 1 < width; x += 2) {
auto tiled_offset = t.getTiledOffs(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
linear_offset += 8;
}
if (x < width) {
auto tiled_offset = t.getTiledOffs(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
}
}
}
} // namespace VideoCore

View file

@ -0,0 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace VideoCore {
/// Converts tiled texture data to linear format.
void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool neo);
} // namespace VideoCore

View file

@ -0,0 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "video_core/texture_cache/slot_vector.h"
namespace VideoCore {
using ImageId = SlotId;
using ImageViewId = SlotId;
struct Offset2D {
s32 x;
s32 y;
};
struct Offset3D {
s32 x;
s32 y;
s32 z;
};
struct Region2D {
Offset2D start;
Offset2D end;
};
struct Extent2D {
u32 width;
u32 height;
};
struct Extent3D {
u32 width;
u32 height;
u32 depth;
};
struct SubresourceLayers {
s32 base_level = 0;
s32 base_layer = 0;
s32 num_layers = 1;
};
struct SubresourceBase {
s32 level = 0;
s32 layer = 0;
};
struct SubresourceExtent {
s32 levels = 1;
s32 layers = 1;
};
struct SubresourceRange {
SubresourceBase base;
SubresourceExtent extent;
};
struct ImageCopy {
SubresourceLayers src_subresource;
SubresourceLayers dst_subresource;
Offset3D src_offset;
Offset3D dst_offset;
Extent3D extent;
};
struct BufferImageCopy {
std::size_t buffer_offset;
std::size_t buffer_size;
u32 buffer_row_length;
u32 buffer_image_height;
SubresourceLayers image_subresource;
Offset3D image_offset;
Extent3D image_extent;
};
struct BufferCopy {
u64 src_offset;
u64 dst_offset;
std::size_t size;
};
} // namespace VideoCore