video_core: Crucial buffer cache fixes + proper GPU clears (#414)

* translator: Use templates for stronger type guarantees

* spirv: Define buffer offsets upfront

* Saves a lot of shader instructions

* buffer_cache: Use dynamic vertex input when available

* Fixes issues when games like dark souls rebind vertex buffers with different stride

* externals: Update boost

* spirv: Use runtime array for ssbos

* ssbos can be large and typically their size will vary, especially in generic copy/clear cs shaders

* fs: Lock when doing case insensitive search

* Dark Souls does fs lookups from different threads

* texture_cache: More precise invalidation from compute

* Fixes unrelated render targets being cleared

* texture_cache: Use hashes for protect gpu modified images from reupload

* translator: Treat V_CNDMASK as float

* Sometimes it can have input modifiers. Worst this will cause is some extra calls to uintBitsToFloat and opposite. But most often this is used as float anyway

* translator: Small optimization for V_SAD_U32

* Fix review

* clang format
This commit is contained in:
TheTurtle 2024-08-13 09:21:48 +03:00 committed by GitHub
parent dfcfd62d4f
commit 1fb0da9b89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 372 additions and 346 deletions

View file

@ -87,6 +87,15 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
}
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
SCOPE_EXIT {
if (instance.IsVertexInputDynamicState()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setVertexInputEXT(bindings, attributes);
}
};
if (vs_info.vs_inputs.empty()) {
return false;
}
@ -122,6 +131,21 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
}
guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format =
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = buffer.GetStride(),
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
.divisor = 1,
});
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
@ -224,6 +248,19 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
return {&buffer, buffer.Offset(device_addr)};
}
std::pair<const Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
const u64 page = gpu_addr >> CACHING_PAGEBITS;
const BufferId buffer_id = page_table[page];
if (buffer_id) {
const Buffer& buffer = slot_buffers[buffer_id];
if (buffer.IsInBounds(gpu_addr, size)) {
return {&buffer, buffer.Offset(gpu_addr)};
}
}
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
return {&staging_buffer, offset};
}
bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
const VAddr end_addr = addr + size;
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
@ -248,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
return memory_tracker.IsRegionCpuModified(addr, size);
}
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
return memory_tracker.IsRegionGpuModified(addr, size);
}
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
if (device_addr == 0) {
return NULL_BUFFER_ID;

View file

@ -69,12 +69,18 @@ public:
/// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written);
/// Obtains a temporary buffer for usage in texture cache.
[[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
/// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
private:
template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {