shader/texture: Join separate image and sampler pairs offline
Games using D3D idioms can join images and samplers when a shader executes, instead of baking them into a combined sampler image. This is also possible on Vulkan. One approach to this solution would be to use separate samplers on Vulkan and leave this unimplemented on OpenGL, but we can't do this because there's no consistent way of determining which constant buffer holds a sampler and which one an image. We could in theory find the first bit and if it's in the TIC area, it's an image; but this falls apart when an image or sampler handle use an index of zero. The used approach is to track for a LOP.OR operation (this is done at an IR level, not at an ISA level), track again the constant buffers used as source and store this pair. Then, outside of shader execution, join the sample and image pair with a bitwise or operation. This approach won't work on games that truly use separate samplers in a meaningful way. For example, pooling textures in a 2D array and determining at runtime what sampler to use. This invalidates OpenGL's disk shader cache :) - Used mostly by D3D ports to Switch
This commit is contained in:
parent
e1438f8e91
commit
5b2b6d594c
16 changed files with 235 additions and 89 deletions
|
@ -65,10 +65,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
|
|||
template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
ShaderType shader_type, std::size_t index = 0) {
|
||||
if (entry.is_bindless) {
|
||||
const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(tex_handle);
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
const u32 buffer_2 = entry.secondary_buffer;
|
||||
const u32 offset_1 = entry.offset;
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(handle);
|
||||
}
|
||||
|
||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||
|
|
|
@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
|
|||
|
||||
namespace {
|
||||
|
||||
using VideoCommon::Shader::SeparateSamplerKey;
|
||||
|
||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||
|
||||
struct ConstBufferKey {
|
||||
|
@ -37,18 +39,26 @@ struct ConstBufferKey {
|
|||
u32 value = 0;
|
||||
};
|
||||
|
||||
struct BoundSamplerKey {
|
||||
struct BoundSamplerEntry {
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerKey {
|
||||
struct SeparateSamplerEntry {
|
||||
u32 cbuf1 = 0;
|
||||
u32 cbuf2 = 0;
|
||||
u32 offset1 = 0;
|
||||
u32 offset2 = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerEntry {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 20;
|
||||
constexpr u32 NativeVersion = 21;
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
|
@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
|||
u32 texture_handler_size_value;
|
||||
u32 num_keys;
|
||||
u32 num_bound_samplers;
|
||||
u32 num_separate_samplers;
|
||||
u32 num_bindless_samplers;
|
||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_separate_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
|||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
||||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
||||
flat_bound_samplers.size() ||
|
||||
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
|
||||
flat_separate_samplers.size() ||
|
||||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
||||
flat_bindless_samplers.size()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& key : flat_keys) {
|
||||
keys.insert({{key.cbuf, key.offset}, key.value});
|
||||
for (const auto& entry : flat_keys) {
|
||||
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||
}
|
||||
for (const auto& key : flat_bound_samplers) {
|
||||
bound_samplers.emplace(key.offset, key.sampler);
|
||||
for (const auto& entry : flat_bound_samplers) {
|
||||
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||
}
|
||||
for (const auto& key : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
|
||||
for (const auto& entry : flat_separate_samplers) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||
key.offsets = {entry.offset1, entry.offset2};
|
||||
separate_samplers.emplace(key, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
|||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
|||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||
}
|
||||
|
||||
std::vector<BoundSamplerKey> flat_bound_samplers;
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||
flat_bound_samplers.reserve(bound_samplers.size());
|
||||
for (const auto& [address, sampler] : bound_samplers) {
|
||||
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
|
||||
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerKey> flat_bindless_samplers;
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||
flat_separate_samplers.reserve(separate_samplers.size());
|
||||
for (const auto& [key, sampler] : separate_samplers) {
|
||||
SeparateSamplerEntry entry;
|
||||
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||
entry.sampler = sampler;
|
||||
flat_separate_samplers.push_back(entry);
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||
for (const auto& [address, sampler] : bindless_samplers) {
|
||||
flat_bindless_samplers.push_back(
|
||||
BindlessSamplerKey{address.first, address.second, sampler});
|
||||
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||
}
|
||||
|
||||
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
||||
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
||||
flat_bound_samplers.size() &&
|
||||
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
|
||||
flat_separate_samplers.size() &&
|
||||
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
||||
flat_bindless_samplers.size();
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
|
|||
VideoCommon::Shader::ComputeInfo compute_info;
|
||||
VideoCommon::Shader::KeyMap keys;
|
||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue