video_core: Crucial buffer cache fixes + proper GPU clears (#414)

* translator: Use templates for stronger type guarantees

* spirv: Define buffer offsets upfront

* Saves a lot of shader instructions

* buffer_cache: Use dynamic vertex input when available

* Fixes issues when games like dark souls rebind vertex buffers with different stride

* externals: Update boost

* spirv: Use runtime array for ssbos

* ssbos can be large and typically their size will vary, especially in generic copy/clear cs shaders

* fs: Lock when doing case insensitive search

* Dark Souls does fs lookups from different threads

* texture_cache: More precise invalidation from compute

* Fixes unrelated render targets being cleared

* texture_cache: Use hashes for protect gpu modified images from reupload

* translator: Treat V_CNDMASK as float

* Sometimes it can have input modifiers. Worst this will cause is some extra calls to uintBitsToFloat and opposite. But most often this is used as float anyway

* translator: Small optimization for V_SAD_U32

* Fix review

* clang format
This commit is contained in:
TheTurtle 2024-08-13 09:21:48 +03:00 committed by GitHub
parent dfcfd62d4f
commit 1fb0da9b89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 372 additions and 346 deletions

View file

@ -165,14 +165,18 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id EmitContext::GetBufferOffset(u32 binding) {
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
void EmitContext::DefineBufferOffsets() {
for (auto& buffer : buffers) {
const u32 binding = buffer.binding;
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
}
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
@ -327,7 +331,9 @@ void EmitContext::DefineBuffers() {
for (u32 i = 0; const auto& buffer : info.buffers) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
const Id data_type = (*data_types)[1];
const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))};
const Id record_array_type{buffer.is_storage
? TypeRuntimeArray(data_type)
: TypeArray(data_type, ConstU32(buffer.length))};
const Id struct_type{TypeStruct(record_array_type)};
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
@ -354,7 +360,7 @@ void EmitContext::DefineBuffers() {
buffers.push_back({
.id = id,
.global_binding = binding++,
.binding = binding++,
.data_types = data_types,
.pointer_type = pointer_type,
.buffer = buffer.GetVsharp(info),