video_core: Crucial buffer cache fixes + proper GPU clears (#414)

* translator: Use templates for stronger type guarantees

* spirv: Define buffer offsets upfront

* Saves a lot of shader instructions

* buffer_cache: Use dynamic vertex input when available

* Fixes issues when games like dark souls rebind vertex buffers with different stride

* externals: Update boost

* spirv: Use runtime array for ssbos

* ssbos can be large and typically their size will vary, especially in generic copy/clear cs shaders

* fs: Lock when doing case insensitive search

* Dark Souls does fs lookups from different threads

* texture_cache: More precise invalidation from compute

* Fixes unrelated render targets being cleared

* texture_cache: Use hashes for protect gpu modified images from reupload

* translator: Treat V_CNDMASK as float

* Sometimes it can have input modifiers. Worst this will cause is some extra calls to uintBitsToFloat and opposite. But most often this is used as float anyway

* translator: Small optimization for V_SAD_U32

* Fix review

* clang format
This commit is contained in:
TheTurtle 2024-08-13 09:21:48 +03:00 committed by GitHub
parent dfcfd62d4f
commit 1fb0da9b89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 372 additions and 346 deletions

View file

@ -47,7 +47,7 @@ public:
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address, bool is_eop) {
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info, false);
const auto image_id = texture_cache.FindImage(info);
auto& image = texture_cache.GetImage(image_id);
return PrepareFrameInternal(image, is_eop);
}
@ -61,7 +61,7 @@ public:
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info, false);
const auto image_id = texture_cache.FindImage(info);
return texture_cache.GetImage(image_id);
}

View file

@ -96,7 +96,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
Shader::PushData push_data{};
u32 binding{};
for (u32 i = 0; const auto& buffer : info.buffers) {
for (const auto& buffer : info.buffers) {
const auto vsharp = buffer.GetVsharp(info);
const VAddr address = vsharp.base_address;
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
}
const u32 size = vsharp.GetSize();
if (buffer.is_written) {
texture_cache.InvalidateMemory(address, size);
texture_cache.InvalidateMemory(address, size, true);
}
const u32 alignment =
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
@ -137,7 +137,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
i++;
}
for (const auto& image_desc : info.images) {

View file

@ -145,6 +145,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT);
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
}
if (instance.IsVertexInputDynamicState()) {
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
}
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),

View file

@ -202,6 +202,8 @@ bool Instance::CreateDevice() {
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
workgroup_memory_explicit_layout =
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
@ -319,6 +321,9 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceSynchronization2Features{
.synchronization2 = true,
},
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
.vertexInputDynamicState = true,
},
};
if (!color_write_en) {
@ -331,8 +336,8 @@ bool Instance::CreateDevice() {
} else {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
if (!has_sync2) {
device_chain.unlink<vk::PhysicalDeviceSynchronization2Features>();
if (!vertex_input_dynamic_state) {
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
}
try {

View file

@ -132,6 +132,11 @@ public:
return color_write_en;
}
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
bool IsVertexInputDynamicState() const {
return vertex_input_dynamic_state;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
@ -257,6 +262,7 @@ private:
bool external_memory_host{};
bool workgroup_memory_explicit_layout{};
bool color_write_en{};
bool vertex_input_dynamic_state{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};

View file

@ -209,6 +209,10 @@ void PipelineCache::RefreshGraphicsKey() {
continue;
}
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
if (!bininfo->Valid()) {
key.stage_hashes[i] = 0;
continue;
}
key.stage_hashes[i] = bininfo->shader_hash;
}
}