async shaders
This commit is contained in:
parent
c783cf443e
commit
468bd9c1b0
16 changed files with 598 additions and 64 deletions
|
@ -233,6 +233,8 @@ Device::Device()
|
|||
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
|
||||
|
||||
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders;
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
|
||||
|
|
|
@ -104,6 +104,10 @@ public:
|
|||
return use_assembly_shaders;
|
||||
}
|
||||
|
||||
bool UseAsynchronousShaders() const {
|
||||
return use_asynchronous_shaders;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestPreciseBug();
|
||||
|
@ -127,6 +131,7 @@ private:
|
|||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool use_assembly_shaders{};
|
||||
bool use_asynchronous_shaders{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -149,7 +149,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
|||
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
|
||||
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
|
||||
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
|
||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
|
||||
async_shaders{emu_window} {
|
||||
CheckExtensions();
|
||||
|
||||
unified_uniform_buffer.Create();
|
||||
|
@ -162,6 +163,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
|||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
// Max worker threads we should allow
|
||||
constexpr auto MAX_THREADS = 8u;
|
||||
// Amount of threads we should reserve for other parts of yuzu
|
||||
constexpr auto RESERVED_THREADS = 6u;
|
||||
// Get the amount of threads we can use(this can return zero)
|
||||
const auto cpu_thread_count =
|
||||
std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
|
||||
// Deduce how many "extra" threads we have to use.
|
||||
const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
|
||||
// Always allow at least 1 thread regardless of our settings
|
||||
const auto max_worker_count = std::max(1u, max_threads_unused);
|
||||
// Don't use more than MAX_THREADS
|
||||
const auto worker_count = std::min(max_worker_count, MAX_THREADS);
|
||||
async_shaders.AllocateWorkers(worker_count);
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||
|
@ -336,7 +354,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
continue;
|
||||
}
|
||||
|
||||
Shader* const shader = shader_cache.GetStageProgram(program);
|
||||
Shader* shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
||||
|
@ -353,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
SetupDrawTextures(stage, shader);
|
||||
SetupDrawImages(stage, shader);
|
||||
|
||||
const GLuint program_handle = shader->GetHandle();
|
||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -91,6 +92,14 @@ public:
|
|||
return num_queued_commands > 0;
|
||||
}
|
||||
|
||||
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
|
||||
return async_shaders;
|
||||
}
|
||||
|
||||
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
|
||||
return async_shaders;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Configures the color and depth framebuffer states.
|
||||
void ConfigureFramebuffers();
|
||||
|
@ -242,6 +251,7 @@ private:
|
|||
ScreenInfo& screen_info;
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
|
|
|
@ -177,6 +177,12 @@ public:
|
|||
Release();
|
||||
}
|
||||
|
||||
OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
|||
return registry;
|
||||
}
|
||||
|
||||
std::unordered_set<GLenum> GetSupportedFormats() {
|
||||
GLint num_formats;
|
||||
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
|
||||
|
||||
std::vector<GLint> formats(num_formats);
|
||||
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
|
||||
|
||||
std::unordered_set<GLenum> supported_formats;
|
||||
for (const GLint format : formats) {
|
||||
supported_formats.insert(static_cast<GLenum>(format));
|
||||
}
|
||||
return supported_formats;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
|
||||
const ShaderIR& ir, const Registry& registry,
|
||||
bool hint_retrievable = false) {
|
||||
const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
|
||||
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
|
||||
LOG_INFO(Render_OpenGL, "{}", shader_id);
|
||||
|
||||
|
@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
|
|||
return program;
|
||||
}
|
||||
|
||||
std::unordered_set<GLenum> GetSupportedFormats() {
|
||||
GLint num_formats;
|
||||
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
|
||||
|
||||
std::vector<GLint> formats(num_formats);
|
||||
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
|
||||
|
||||
std::unordered_set<GLenum> supported_formats;
|
||||
for (const GLint format : formats) {
|
||||
supported_formats.insert(static_cast<GLenum>(format));
|
||||
}
|
||||
return supported_formats;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
|
||||
ProgramSharedPtr program_)
|
||||
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
|
||||
ProgramSharedPtr program_, bool is_built)
|
||||
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
|
||||
is_built(is_built) {
|
||||
handle = program->assembly_program.handle;
|
||||
if (handle == 0) {
|
||||
handle = program->source_program.handle;
|
||||
}
|
||||
ASSERT(handle != 0);
|
||||
if (is_built) {
|
||||
ASSERT(handle != 0);
|
||||
}
|
||||
}
|
||||
|
||||
Shader::~Shader() = default;
|
||||
|
@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const {
|
|||
return handle;
|
||||
}
|
||||
|
||||
std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode code, ProgramCode code_b) {
|
||||
bool Shader::IsBuilt() const {
|
||||
return is_built;
|
||||
}
|
||||
|
||||
void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
|
||||
program->source_program = std::move(new_program);
|
||||
handle = program->source_program.handle;
|
||||
is_built = true;
|
||||
}
|
||||
|
||||
void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
|
||||
program->assembly_program = std::move(new_program);
|
||||
handle = program->assembly_program.handle;
|
||||
is_built = true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Shader> Shader::CreateStageFromMemory(
|
||||
const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
|
||||
ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
|
||||
const auto shader_type = GetShaderType(program_type);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
|
||||
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
// TODO(Rodrigo): Handle VertexA shaders
|
||||
// std::optional<ShaderIR> ir_b;
|
||||
// if (!code_b.empty()) {
|
||||
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
|
||||
// }
|
||||
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
|
||||
auto& gpu = params.system.GPU();
|
||||
gpu.ShaderNotify().MarkSharderBuilding();
|
||||
|
||||
ShaderDiskCacheEntry entry;
|
||||
entry.type = shader_type;
|
||||
entry.code = std::move(code);
|
||||
entry.code_b = std::move(code_b);
|
||||
entry.unique_identifier = params.unique_identifier;
|
||||
entry.bound_buffer = registry->GetBoundBuffer();
|
||||
entry.graphics_info = registry->GetGraphicsInfo();
|
||||
entry.keys = registry->GetKeys();
|
||||
entry.bound_samplers = registry->GetBoundSamplers();
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
|
||||
if (!async_shaders.IsShaderAsync(params.system.GPU()) ||
|
||||
!params.device.UseAsynchronousShaders()) {
|
||||
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
// TODO(Rodrigo): Handle VertexA shaders
|
||||
// std::optional<ShaderIR> ir_b;
|
||||
// if (!code_b.empty()) {
|
||||
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
|
||||
// }
|
||||
auto program =
|
||||
BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
|
||||
ShaderDiskCacheEntry entry;
|
||||
entry.type = shader_type;
|
||||
entry.code = std::move(code);
|
||||
entry.code_b = std::move(code_b);
|
||||
entry.unique_identifier = params.unique_identifier;
|
||||
entry.bound_buffer = registry->GetBoundBuffer();
|
||||
entry.graphics_info = registry->GetGraphicsInfo();
|
||||
entry.keys = registry->GetKeys();
|
||||
entry.bound_samplers = registry->GetBoundSamplers();
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::unique_ptr<Shader>(new Shader(
|
||||
std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
|
||||
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||
MakeEntries(params.device, ir, shader_type),
|
||||
std::move(program), true));
|
||||
} else {
|
||||
// Required for entries
|
||||
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
auto entries = MakeEntries(params.device, ir, shader_type);
|
||||
|
||||
async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
|
||||
std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
|
||||
COMPILER_SETTINGS, *registry, cpu_addr);
|
||||
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
return std::unique_ptr<Shader>(
|
||||
new Shader(std::move(registry), std::move(entries), std::move(program), false));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code) {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto& engine = params.system.GPU().KeplerCompute();
|
||||
auto& gpu = params.system.GPU();
|
||||
gpu.ShaderNotify().MarkSharderBuilding();
|
||||
|
||||
auto& engine = gpu.KeplerCompute();
|
||||
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
|
||||
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
const u64 uid = params.unique_identifier;
|
||||
|
@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p
|
|||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
|
||||
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||
MakeEntries(params.device, ir, ShaderType::Compute),
|
||||
std::move(program)));
|
||||
|
@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
|||
return program;
|
||||
}
|
||||
|
||||
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders) {
|
||||
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
|
||||
return last_shaders[static_cast<std::size_t>(program)];
|
||||
auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
|
||||
if (last_shader->IsBuilt()) {
|
||||
return last_shader;
|
||||
}
|
||||
}
|
||||
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const GPUVAddr address{GetShaderAddress(system, program)};
|
||||
|
||||
if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
|
||||
auto completed_work = async_shaders.GetCompletedWork();
|
||||
for (auto& work : completed_work) {
|
||||
Shader* shader = TryGet(work.cpu_address);
|
||||
auto& gpu = system.GPU();
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
if (shader == nullptr) {
|
||||
continue;
|
||||
}
|
||||
using namespace VideoCommon::Shader;
|
||||
if (work.backend == AsyncShaders::Backend::OpenGL) {
|
||||
shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
|
||||
} else if (work.backend == AsyncShaders::Backend::GLASM) {
|
||||
shader->AsyncGLASMBuilt(std::move(work.program.glasm));
|
||||
}
|
||||
|
||||
ShaderDiskCacheEntry entry;
|
||||
entry.type = work.shader_type;
|
||||
entry.code = std::move(work.code);
|
||||
entry.code_b = std::move(work.code_b);
|
||||
entry.unique_identifier = work.uid;
|
||||
|
||||
auto& registry = shader->GetRegistry();
|
||||
|
||||
entry.bound_buffer = registry.GetBoundBuffer();
|
||||
entry.graphics_info = registry.GetGraphicsInfo();
|
||||
entry.keys = registry.GetKeys();
|
||||
entry.bound_samplers = registry.GetBoundSamplers();
|
||||
entry.bindless_samplers = registry.GetBindlessSamplers();
|
||||
disk_cache.SaveEntry(std::move(entry));
|
||||
}
|
||||
}
|
||||
|
||||
// Look up shader in the cache based on address
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
||||
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
|
||||
|
@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
|||
std::unique_ptr<Shader> shader;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
|
||||
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
|
||||
async_shaders, cpu_addr.value_or(0));
|
||||
} else {
|
||||
shader = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
|
|
@ -33,6 +33,10 @@ namespace Core::Frontend {
|
|||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class AsyncShaders;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
@ -61,6 +65,11 @@ struct ShaderParameters {
|
|||
u64 unique_identifier;
|
||||
};
|
||||
|
||||
ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
|
||||
u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
bool hint_retrievable = false);
|
||||
|
||||
class Shader final {
|
||||
public:
|
||||
~Shader();
|
||||
|
@ -68,15 +77,28 @@ public:
|
|||
/// Gets the GL program handle for the shader
|
||||
GLuint GetHandle() const;
|
||||
|
||||
bool IsBuilt() const;
|
||||
|
||||
/// Gets the shader entries for the shader
|
||||
const ShaderEntries& GetEntries() const {
|
||||
return entries;
|
||||
}
|
||||
|
||||
static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code,
|
||||
ProgramCode program_code_b);
|
||||
const VideoCommon::Shader::Registry& GetRegistry() const {
|
||||
return *registry;
|
||||
}
|
||||
|
||||
/// Mark a OpenGL shader as built
|
||||
void AsyncOpenGLBuilt(OGLProgram new_program);
|
||||
|
||||
/// Mark a GLASM shader as built
|
||||
void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
|
||||
|
||||
static std::unique_ptr<Shader> CreateStageFromMemory(
|
||||
const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
|
||||
|
||||
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code);
|
||||
|
||||
|
@ -85,12 +107,13 @@ public:
|
|||
|
||||
private:
|
||||
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
|
||||
ProgramSharedPtr program);
|
||||
ProgramSharedPtr program, bool is_built = true);
|
||||
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
ProgramSharedPtr program;
|
||||
GLuint handle = 0;
|
||||
bool is_built{};
|
||||
};
|
||||
|
||||
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||
|
@ -104,7 +127,8 @@ public:
|
|||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
Shader* GetStageProgram(Maxwell::ShaderProgram program);
|
||||
Shader* GetStageProgram(Maxwell::ShaderProgram program,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader* GetComputeKernel(GPUVAddr code_addr);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue