shader/registry: Store graphics and compute metadata

Store information GLSL forces us to provide but it's dynamic state in
hardware (workgroup sizes, primitive topology, shared memory size).
This commit is contained in:
ReinUsesLisp 2020-02-29 03:49:51 -03:00
parent e8efd5a901
commit 0528be5c92
8 changed files with 176 additions and 75 deletions

View file

@ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
auto registry = std::make_shared<Registry>(entry.type, guest_profile);
registry->SetBoundBuffer(entry.bound_buffer);
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info};
const auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value);
@ -184,9 +185,9 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
u64 unique_identifier, const ShaderIR& ir,
bool hint_retrievable = false) {
const Registry& registry, bool hint_retrievable = false) {
LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type));
const std::string glsl = DecompileShader(device, ir, shader_type);
const std::string glsl = DecompileShader(device, ir, registry, shader_type);
OGLShader shader;
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
@ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
// if (!code_b.empty()) {
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir);
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = shader_type;
@ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.code_b = std::move(code_b);
entry.unique_identifier = params.unique_identifier;
entry.bound_buffer = registry->GetBoundBuffer();
entry.graphics_info = registry->GetGraphicsInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
@ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto registry =
std::make_shared<Registry>(ShaderType::Compute, params.system.GPU().KeplerCompute());
auto& engine = params.system.GPU().KeplerCompute();
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = ShaderType::Compute;
entry.code = std::move(code);
entry.unique_identifier = params.unique_identifier;
entry.unique_identifier = uid;
entry.bound_buffer = registry->GetBoundBuffer();
entry.compute_info = registry->GetComputeInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
@ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
const auto& entry = (*transferable)[i];
const u64 unique_identifier = entry.unique_identifier;
const auto it = find_precompiled(unique_identifier);
const u64 uid = entry.unique_identifier;
const auto it = find_precompiled(uid);
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
const bool is_compute = entry.type == ShaderType::Compute;
@ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
if (!program) {
// Otherwise compile it from GLSL
program = BuildShader(device, entry.type, unique_identifier, ir, true);
program = BuildShader(device, entry.type, uid, ir, *registry, true);
}
PrecompiledShader shader;

View file

@ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
@ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
}
}
/// Describes primitive behavior on geometry shaders
std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return {"points", 1};
case Maxwell::PrimitiveTopology::Lines:
case Maxwell::PrimitiveTopology::LineStrip:
return {"lines", 2};
case Maxwell::PrimitiveTopology::LinesAdjacency:
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return {"lines_adjacency", 4};
case Maxwell::PrimitiveTopology::Triangles:
case Maxwell::PrimitiveTopology::TriangleStrip:
case Maxwell::PrimitiveTopology::TriangleFan:
return {"triangles", 3};
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return {"triangles_adjacency", 6};
default:
UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
return {"points", 1};
}
}
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(std::size_t element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
@ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) {
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
std::string_view suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view suffix)
: device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix},
header{ir.GetHeader()} {}
void Decompile() {
DeclareHeader();
DeclareVertex();
DeclareGeometry();
DeclareFragment();
DeclareCompute();
DeclareRegisters();
DeclareCustomVariables();
DeclarePredicates();
@ -489,9 +516,15 @@ private:
return;
}
const auto& info = registry.GetGraphicsInfo();
const auto input_topology = info.primitive_topology;
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
max_input_vertices = max_vertices;
code.AddLine("layout ({}) in;", glsl_topology);
const auto topology = GetTopologyName(header.common3.output_topology);
const auto max_vertices = header.common4.max_output_vertices.Value();
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
const auto max_output_vertices = header.common4.max_output_vertices.Value();
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
code.AddNewLine();
code.AddLine("in gl_PerVertex {{");
@ -513,7 +546,8 @@ private:
if (!IsRenderTargetEnabled(render_target)) {
continue;
}
code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target);
code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target,
render_target);
any = true;
}
if (any) {
@ -521,6 +555,20 @@ private:
}
}
void DeclareCompute() {
if (stage != ShaderType::Compute) {
return;
}
const auto& info = registry.GetComputeInfo();
if (const u32 size = info.shared_memory_size_in_words; size > 0) {
code.AddLine("shared uint smem[];", size);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
code.AddNewLine();
}
void DeclareVertexRedeclarations() {
code.AddLine("out gl_PerVertex {{");
++code.scope;
@ -596,18 +644,16 @@ private:
}
void DeclareLocalMemory() {
u64 local_memory_size = 0;
if (stage == ShaderType::Compute) {
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
code.AddLine("#endif");
return;
local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
} else {
local_memory_size = header.GetLocalMemorySize();
}
const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@ -996,7 +1042,8 @@ private:
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
max_input_vertices.value());
}
return std::string(name);
};
@ -2428,11 +2475,14 @@ private:
const Device& device;
const ShaderIR& ir;
const Registry& registry;
const ShaderType stage;
const std::string_view suffix;
const Header header;
ShaderWriter code;
std::optional<u32> max_input_vertices;
};
std::string GetFlowVariable(u32 index) {
@ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
return entries;
}
std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage,
std::string_view suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view suffix) {
GLSLDecompiler decompiler(device, ir, registry, stage, suffix);
decompiler.Decompile();
return decompiler.GetResult();
}

View file

@ -12,12 +12,9 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace OpenGL {
class Device;
@ -80,6 +77,7 @@ struct ShaderEntries {
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view suffix = {});
} // namespace OpenGL

View file

@ -48,7 +48,7 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler;
};
constexpr u32 NativeVersion = 16;
constexpr u32 NativeVersion = 17;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
return false;
}
bool is_texture_handler_size_known;
u8 is_texture_handler_size_known;
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
@ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
}
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
file.WriteObject(texture_handler_size.has_value()) != 1 ||
file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {

View file

@ -51,8 +51,10 @@ struct ShaderDiskCacheEntry {
ProgramCode code_b;
u64 unique_identifier = 0;
u32 bound_buffer = 0;
std::optional<u32> texture_handler_size;
u32 bound_buffer = 0;
VideoCommon::Shader::GraphicsInfo graphics_info;
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;