Merge pull request #2601 from FernandoS27/texture_cache
Implement a new Texture Cache
This commit is contained in:
commit
772c86a260
63 changed files with 4196 additions and 3269 deletions
75
src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
Normal file
75
src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
|
||||
|
||||
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
|
||||
|
||||
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
|
||||
const auto [entry, is_cache_miss] = cache.try_emplace(key);
|
||||
auto& framebuffer{entry->second};
|
||||
if (is_cache_miss) {
|
||||
framebuffer = CreateFramebuffer(key);
|
||||
}
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
|
||||
OGLFramebuffer framebuffer;
|
||||
framebuffer.Create();
|
||||
|
||||
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
|
||||
local_state.draw.draw_framebuffer = framebuffer.handle;
|
||||
local_state.ApplyFramebufferState();
|
||||
|
||||
if (key.is_single_buffer) {
|
||||
if (key.color_attachments[0] != GL_NONE && key.colors[0]) {
|
||||
key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER);
|
||||
glDrawBuffer(key.color_attachments[0]);
|
||||
} else {
|
||||
glDrawBuffer(GL_NONE);
|
||||
}
|
||||
} else {
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (key.colors[index]) {
|
||||
key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
|
||||
GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
}
|
||||
glDrawBuffers(key.colors_count, key.color_attachments.data());
|
||||
}
|
||||
|
||||
if (key.zeta) {
|
||||
key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
|
||||
GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
std::size_t FramebufferCacheKey::Hash() const {
|
||||
static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
|
||||
return static_cast<std::size_t>(
|
||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
|
||||
}
|
||||
|
||||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
|
||||
return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors,
|
||||
zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count,
|
||||
rhs.color_attachments, rhs.colors, rhs.zeta);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
68
src/video_core/renderer_opengl/gl_framebuffer_cache.h
Normal file
68
src/video_core/renderer_opengl/gl_framebuffer_cache.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct alignas(sizeof(u64)) FramebufferCacheKey {
|
||||
bool is_single_buffer = false;
|
||||
bool stencil_enable = false;
|
||||
u16 colors_count = 0;
|
||||
|
||||
std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
|
||||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
|
||||
View zeta;
|
||||
|
||||
std::size_t Hash() const;
|
||||
|
||||
bool operator==(const FramebufferCacheKey& rhs) const;
|
||||
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::FramebufferCacheKey> {
|
||||
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class FramebufferCacheOpenGL {
|
||||
public:
|
||||
FramebufferCacheOpenGL();
|
||||
~FramebufferCacheOpenGL();
|
||||
|
||||
GLuint GetFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
private:
|
||||
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
OpenGLState local_state;
|
||||
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -29,8 +29,10 @@
|
|||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using SurfaceType = VideoCore::Surface::SurfaceType;
|
||||
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
|
||||
|
@ -78,29 +80,9 @@ struct DrawParameters {
|
|||
}
|
||||
};
|
||||
|
||||
struct FramebufferCacheKey {
|
||||
bool is_single_buffer = false;
|
||||
bool stencil_enable = false;
|
||||
|
||||
std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
|
||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
|
||||
u32 colors_count = 0;
|
||||
|
||||
GLuint zeta = 0;
|
||||
|
||||
auto Tie() const {
|
||||
return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
|
||||
zeta);
|
||||
}
|
||||
|
||||
bool operator<(const FramebufferCacheKey& rhs) const {
|
||||
return Tie() < rhs.Tie();
|
||||
}
|
||||
};
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||
ScreenInfo& info)
|
||||
: res_cache{*this}, shader_cache{*this, system, emu_window, device},
|
||||
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
|
||||
global_cache{*this}, system{system}, screen_info{info},
|
||||
buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||
OpenGLState::ApplyDefaultState();
|
||||
|
@ -121,11 +103,6 @@ void RasterizerOpenGL::CheckExtensions() {
|
|||
Render_OpenGL,
|
||||
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
|
||||
}
|
||||
if (!GLAD_GL_ARB_buffer_storage) {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Buffer storage control is not supported! This can cause performance degradation.");
|
||||
}
|
||||
}
|
||||
|
||||
GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
|
@ -302,8 +279,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
const auto [program_handle, next_bindings] =
|
||||
shader->GetProgramHandle(primitive_mode, base_bindings);
|
||||
|
||||
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
|
||||
SetupDrawConstBuffers(stage_enum, shader);
|
||||
SetupGlobalRegions(stage_enum, shader);
|
||||
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
|
||||
|
||||
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
|
||||
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
|
||||
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
|
@ -321,11 +304,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
shader_config.enable.Value(), shader_config.offset);
|
||||
}
|
||||
|
||||
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
|
||||
SetupDrawConstBuffers(stage_enum, shader);
|
||||
SetupGlobalRegions(stage_enum, shader);
|
||||
SetupTextures(stage_enum, shader, base_bindings);
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||
|
@ -351,44 +329,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
gpu.dirty_flags.shaders = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
|
||||
OpenGLState& current_state) {
|
||||
const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
|
||||
auto& framebuffer = entry->second;
|
||||
|
||||
if (is_cache_miss)
|
||||
framebuffer.Create();
|
||||
|
||||
current_state.draw.draw_framebuffer = framebuffer.handle;
|
||||
current_state.ApplyFramebufferState();
|
||||
|
||||
if (!is_cache_miss)
|
||||
return;
|
||||
|
||||
if (fbkey.is_single_buffer) {
|
||||
if (fbkey.color_attachments[0] != GL_NONE) {
|
||||
glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
|
||||
0);
|
||||
}
|
||||
glDrawBuffer(fbkey.color_attachments[0]);
|
||||
} else {
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (fbkey.colors[index]) {
|
||||
glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
|
||||
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
|
||||
fbkey.colors[index], 0);
|
||||
}
|
||||
}
|
||||
glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
|
||||
}
|
||||
|
||||
if (fbkey.zeta) {
|
||||
GLenum zeta_attachment =
|
||||
fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
|
||||
glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
|
||||
|
@ -478,9 +418,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
|||
}
|
||||
current_framebuffer_config_state = fb_config_state;
|
||||
|
||||
Surface depth_surface;
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
|
||||
View depth_surface{};
|
||||
if (using_depth_fb) {
|
||||
depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
|
||||
} else {
|
||||
texture_cache.SetEmptyDepthBuffer();
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
|
||||
|
@ -493,13 +437,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
|||
if (using_color_fb) {
|
||||
if (single_color_target) {
|
||||
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
|
||||
Surface color_surface =
|
||||
res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
|
||||
View color_surface{
|
||||
texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)};
|
||||
|
||||
if (color_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even if
|
||||
// the shader doesn't actually write to it.
|
||||
color_surface->MarkAsModified(true, res_cache);
|
||||
texture_cache.MarkColorBufferInUse(*single_color_target);
|
||||
// Workaround for and issue in nvidia drivers
|
||||
// https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
|
||||
state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
|
||||
|
@ -508,16 +452,21 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
|||
fbkey.is_single_buffer = true;
|
||||
fbkey.color_attachments[0] =
|
||||
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
|
||||
fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0;
|
||||
fbkey.colors[0] = color_surface;
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (index != *single_color_target) {
|
||||
texture_cache.SetEmptyColorBuffer(index);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Multiple color attachments are enabled
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)};
|
||||
|
||||
if (color_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even
|
||||
// if the shader doesn't actually write to it.
|
||||
color_surface->MarkAsModified(true, res_cache);
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
// Enable sRGB only for supported formats
|
||||
// Workaround for and issue in nvidia drivers
|
||||
// https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
|
||||
|
@ -527,8 +476,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
|||
|
||||
fbkey.color_attachments[index] =
|
||||
GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
|
||||
fbkey.colors[index] =
|
||||
color_surface != nullptr ? color_surface->Texture().handle : 0;
|
||||
fbkey.colors[index] = color_surface;
|
||||
}
|
||||
fbkey.is_single_buffer = false;
|
||||
fbkey.colors_count = regs.rt_control.count;
|
||||
|
@ -541,14 +489,16 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
|||
if (depth_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even if
|
||||
// the shader doesn't actually write to it.
|
||||
depth_surface->MarkAsModified(true, res_cache);
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
|
||||
fbkey.zeta = depth_surface->Texture().handle;
|
||||
fbkey.zeta = depth_surface;
|
||||
fbkey.stencil_enable = regs.stencil_enable &&
|
||||
depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
|
||||
}
|
||||
|
||||
SetupCachedFramebuffer(fbkey, current_state);
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
|
||||
SyncViewport(current_state);
|
||||
|
||||
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
|
||||
|
@ -630,6 +580,7 @@ void RasterizerOpenGL::Clear() {
|
|||
clear_state.ApplyDepth();
|
||||
clear_state.ApplyStencilTest();
|
||||
clear_state.ApplyViewport();
|
||||
clear_state.ApplyFramebufferState();
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
|
@ -652,7 +603,6 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
ConfigureFramebuffers(state);
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
|
@ -697,16 +647,22 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
SetupVertexBuffer(vao);
|
||||
|
||||
DrawParameters params = SetupDraw();
|
||||
texture_cache.GuardSamplers(true);
|
||||
SetupShaders(params.primitive_mode);
|
||||
texture_cache.GuardSamplers(false);
|
||||
|
||||
ConfigureFramebuffers(state);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
res_cache.SignalPreDrawCall();
|
||||
if (texture_cache.TextureBarrier()) {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
params.DispatchDraw();
|
||||
res_cache.SignalPostDrawCall();
|
||||
|
||||
accelerate_draw = AccelDraw::Disabled;
|
||||
}
|
||||
|
@ -718,7 +674,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
|||
if (!addr || !size) {
|
||||
return;
|
||||
}
|
||||
res_cache.FlushRegion(addr, size);
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
global_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
|
@ -727,23 +683,24 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
|||
if (!addr || !size) {
|
||||
return;
|
||||
}
|
||||
res_cache.InvalidateRegion(addr, size);
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
global_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
FlushRegion(addr, size);
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
FlushRegion(addr, size);
|
||||
}
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) {
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
|
||||
texture_cache.DoFermiCopy(src, dst, copy_config);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -755,7 +712,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
|
||||
const auto surface{
|
||||
texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
|
||||
if (!surface) {
|
||||
return {};
|
||||
}
|
||||
|
@ -771,7 +729,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
}
|
||||
|
||||
screen_info.display_texture = surface->Texture().handle;
|
||||
screen_info.display_texture = surface->GetTexture();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -837,8 +795,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& gpu = system.GPU();
|
||||
const auto& maxwell3d = gpu.Maxwell3D();
|
||||
|
@ -847,6 +805,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
|
|||
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
|
||||
"Exceeded the number of active textures.");
|
||||
|
||||
TextureBufferUsage texture_buffer_usage{0};
|
||||
|
||||
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry = entries[bindpoint];
|
||||
Tegra::Texture::FullTextureInfo texture;
|
||||
|
@ -860,18 +820,26 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
|
|||
}
|
||||
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
|
||||
|
||||
state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
|
||||
auto& unit{state.texture_units[current_bindpoint]};
|
||||
unit.sampler = sampler_cache.GetSampler(texture.tsc);
|
||||
|
||||
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
|
||||
state.texture_units[current_bindpoint].texture =
|
||||
surface->Texture(entry.IsArray()).handle;
|
||||
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
|
||||
if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) {
|
||||
if (view->GetSurfaceParams().IsBuffer()) {
|
||||
// Record that this texture is a texture buffer.
|
||||
texture_buffer_usage.set(bindpoint);
|
||||
} else {
|
||||
// Apply swizzle to textures that are not buffers.
|
||||
view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
|
||||
texture.tic.w_source);
|
||||
}
|
||||
state.texture_units[current_bindpoint].texture = view->GetTexture();
|
||||
} else {
|
||||
// Can occur when texture addr is null or its memory is unmapped/invalid
|
||||
state.texture_units[current_bindpoint].texture = 0;
|
||||
unit.texture = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return texture_buffer_usage;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
|
||||
|
|
|
@ -23,14 +23,15 @@
|
|||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_global_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -41,11 +42,14 @@ namespace Core::Frontend {
|
|||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
struct DrawParameters;
|
||||
struct FramebufferCacheKey;
|
||||
|
||||
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
|
||||
public:
|
||||
|
@ -61,8 +65,7 @@ public:
|
|||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) override;
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
|
@ -95,6 +98,8 @@ private:
|
|||
|
||||
/**
|
||||
* Configures the color and depth framebuffer states.
|
||||
* @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure
|
||||
* again. Used by the texture cache to solve texception conflicts
|
||||
* @param use_color_fb If true, configure color framebuffers.
|
||||
* @param using_depth_fb If true, configure the depth/stencil framebuffer.
|
||||
* @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
|
||||
|
@ -118,9 +123,10 @@ private:
|
|||
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
|
||||
BaseBindings base_bindings);
|
||||
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
|
||||
/// usage.
|
||||
TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader, BaseBindings base_bindings);
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport(OpenGLState& current_state);
|
||||
|
@ -181,10 +187,11 @@ private:
|
|||
const Device device;
|
||||
OpenGLState state;
|
||||
|
||||
RasterizerCacheOpenGL res_cache;
|
||||
TextureCacheOpenGL texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
GlobalRegionCacheOpenGL global_cache;
|
||||
SamplerCacheOpenGL sampler_cache;
|
||||
FramebufferCacheOpenGL framebuffer_cache;
|
||||
|
||||
Core::System& system;
|
||||
ScreenInfo& screen_info;
|
||||
|
@ -195,7 +202,6 @@ private:
|
|||
OGLVertexArray>
|
||||
vertex_array_cache;
|
||||
|
||||
std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
|
||||
FramebufferConfigState current_framebuffer_config_state;
|
||||
std::pair<bool, bool> current_depth_stencil_usage{};
|
||||
|
||||
|
@ -218,8 +224,6 @@ private:
|
|||
|
||||
void SetupShaders(GLenum primitive_mode);
|
||||
|
||||
void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state);
|
||||
|
||||
enum class AccelDraw { Disabled, Arrays, Indexed };
|
||||
AccelDraw accelerate_draw = AccelDraw::Disabled;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,572 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class CachedSurface;
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
|
||||
|
||||
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
|
||||
using SurfaceType = VideoCore::Surface::SurfaceType;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using ComponentType = VideoCore::Surface::ComponentType;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct SurfaceParams {
|
||||
enum class SurfaceClass {
|
||||
Uploaded,
|
||||
RenderTarget,
|
||||
DepthBuffer,
|
||||
Copy,
|
||||
};
|
||||
|
||||
static std::string SurfaceTargetName(SurfaceTarget target) {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
return "Texture1D";
|
||||
case SurfaceTarget::Texture2D:
|
||||
return "Texture2D";
|
||||
case SurfaceTarget::Texture3D:
|
||||
return "Texture3D";
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
return "Texture1DArray";
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
return "Texture2DArray";
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
return "TextureCubemap";
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
return "TextureCubeArray";
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
|
||||
UNREACHABLE();
|
||||
return fmt::format("TextureUnknown({})", static_cast<u32>(target));
|
||||
}
|
||||
}
|
||||
|
||||
u32 GetFormatBpp() const {
|
||||
return VideoCore::Surface::GetFormatBpp(pixel_format);
|
||||
}
|
||||
|
||||
/// Returns the rectangle corresponding to this surface
|
||||
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
|
||||
|
||||
/// Returns the total size of this surface in bytes, adjusted for compression
|
||||
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
|
||||
const u32 compression_factor{GetCompressionFactor(pixel_format)};
|
||||
const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
|
||||
const size_t uncompressed_size{
|
||||
Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
|
||||
height, depth, block_height, block_depth)};
|
||||
|
||||
// Divide by compression_factor^2, as height and width are factored by this
|
||||
return uncompressed_size / (compression_factor * compression_factor);
|
||||
}
|
||||
|
||||
/// Returns the size of this surface as an OpenGL texture in bytes
|
||||
std::size_t SizeInBytesGL() const {
|
||||
return SizeInBytesRaw(true);
|
||||
}
|
||||
|
||||
/// Returns the size of this surface as a cube face in bytes
|
||||
std::size_t SizeInBytesCubeFace() const {
|
||||
return size_in_bytes / 6;
|
||||
}
|
||||
|
||||
/// Returns the size of this surface as an OpenGL cube face in bytes
|
||||
std::size_t SizeInBytesCubeFaceGL() const {
|
||||
return size_in_bytes_gl / 6;
|
||||
}
|
||||
|
||||
/// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
|
||||
std::size_t MemorySize() const {
|
||||
std::size_t size = InnerMemorySize(false, is_layered);
|
||||
if (is_layered)
|
||||
return size * depth;
|
||||
return size;
|
||||
}
|
||||
|
||||
/// Returns true if the parameters constitute a valid rasterizer surface.
|
||||
bool IsValid() const {
|
||||
return gpu_addr && host_ptr && height && width;
|
||||
}
|
||||
|
||||
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
|
||||
/// mipmaps.
|
||||
std::size_t LayerMemorySize() const {
|
||||
return InnerMemorySize(false, true);
|
||||
}
|
||||
|
||||
/// Returns the size of a layer of this surface in OpenGL.
|
||||
std::size_t LayerSizeGL(u32 mip_level) const {
|
||||
return InnerMipmapMemorySize(mip_level, true, is_layered, false);
|
||||
}
|
||||
|
||||
std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
|
||||
std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
|
||||
if (is_layered)
|
||||
return size * depth;
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetMipmapLevelOffset(u32 mip_level) const {
|
||||
std::size_t offset = 0;
|
||||
for (u32 i = 0; i < mip_level; i++)
|
||||
offset += InnerMipmapMemorySize(i, false, is_layered);
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
|
||||
std::size_t offset = 0;
|
||||
for (u32 i = 0; i < mip_level; i++)
|
||||
offset += InnerMipmapMemorySize(i, true, is_layered);
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetMipmapSingleSize(u32 mip_level) const {
|
||||
return InnerMipmapMemorySize(mip_level, false, is_layered);
|
||||
}
|
||||
|
||||
u32 MipWidth(u32 mip_level) const {
|
||||
return std::max(1U, width >> mip_level);
|
||||
}
|
||||
|
||||
u32 MipWidthGobAligned(u32 mip_level) const {
|
||||
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
|
||||
}
|
||||
|
||||
u32 MipHeight(u32 mip_level) const {
|
||||
return std::max(1U, height >> mip_level);
|
||||
}
|
||||
|
||||
u32 MipDepth(u32 mip_level) const {
|
||||
return is_layered ? depth : std::max(1U, depth >> mip_level);
|
||||
}
|
||||
|
||||
// Auto block resizing algorithm from:
|
||||
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
|
||||
u32 MipBlockHeight(u32 mip_level) const {
|
||||
if (mip_level == 0)
|
||||
return block_height;
|
||||
u32 alt_height = MipHeight(mip_level);
|
||||
u32 h = GetDefaultBlockHeight(pixel_format);
|
||||
u32 blocks_in_y = (alt_height + h - 1) / h;
|
||||
u32 bh = 16;
|
||||
while (bh > 1 && blocks_in_y <= bh * 4) {
|
||||
bh >>= 1;
|
||||
}
|
||||
return bh;
|
||||
}
|
||||
|
||||
u32 MipBlockDepth(u32 mip_level) const {
|
||||
if (mip_level == 0) {
|
||||
return block_depth;
|
||||
}
|
||||
|
||||
if (is_layered) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
const u32 mip_depth = MipDepth(mip_level);
|
||||
u32 bd = 32;
|
||||
while (bd > 1 && mip_depth * 2 <= bd) {
|
||||
bd >>= 1;
|
||||
}
|
||||
|
||||
if (bd == 32) {
|
||||
const u32 bh = MipBlockHeight(mip_level);
|
||||
if (bh >= 4) {
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
return bd;
|
||||
}
|
||||
|
||||
u32 RowAlign(u32 mip_level) const {
|
||||
const u32 m_width = MipWidth(mip_level);
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
|
||||
const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
|
||||
return (1U << l2);
|
||||
}
|
||||
|
||||
/// Creates SurfaceParams from a texture configuration
|
||||
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
|
||||
const GLShader::SamplerEntry& entry);
|
||||
|
||||
/// Creates SurfaceParams from a framebuffer configuration
|
||||
static SurfaceParams CreateForFramebuffer(std::size_t index);
|
||||
|
||||
/// Creates SurfaceParams for a depth buffer configuration
|
||||
static SurfaceParams CreateForDepthBuffer(
|
||||
u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
|
||||
u32 block_width, u32 block_height, u32 block_depth,
|
||||
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
|
||||
|
||||
/// Creates SurfaceParams for a Fermi2D surface copy
|
||||
static SurfaceParams CreateForFermiCopySurface(
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& config);
|
||||
|
||||
/// Checks if surfaces are compatible for caching
|
||||
bool IsCompatibleSurface(const SurfaceParams& other) const {
|
||||
if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
|
||||
std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
|
||||
other.depth, other.is_tiled)) {
|
||||
if (!is_tiled)
|
||||
return true;
|
||||
return std::tie(block_height, block_depth, tile_width_spacing) ==
|
||||
std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Initializes parameters for caching, should be called after everything has been initialized
|
||||
void InitCacheParameters(GPUVAddr gpu_addr);
|
||||
|
||||
std::string TargetName() const {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
return "1D";
|
||||
case SurfaceTarget::Texture2D:
|
||||
return "2D";
|
||||
case SurfaceTarget::Texture3D:
|
||||
return "3D";
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
return "1DArray";
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
return "2DArray";
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
return "Cube";
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
|
||||
UNREACHABLE();
|
||||
return fmt::format("TUK({})", static_cast<u32>(target));
|
||||
}
|
||||
}
|
||||
|
||||
std::string ClassName() const {
|
||||
switch (identity) {
|
||||
case SurfaceClass::Uploaded:
|
||||
return "UP";
|
||||
case SurfaceClass::RenderTarget:
|
||||
return "RT";
|
||||
case SurfaceClass::DepthBuffer:
|
||||
return "DB";
|
||||
case SurfaceClass::Copy:
|
||||
return "CP";
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
|
||||
UNREACHABLE();
|
||||
return fmt::format("CUK({})", static_cast<u32>(identity));
|
||||
}
|
||||
}
|
||||
|
||||
std::string IdentityString() const {
|
||||
return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
|
||||
}
|
||||
|
||||
bool is_tiled;
|
||||
u32 block_width;
|
||||
u32 block_height;
|
||||
u32 block_depth;
|
||||
u32 tile_width_spacing;
|
||||
PixelFormat pixel_format;
|
||||
ComponentType component_type;
|
||||
SurfaceType type;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 unaligned_height;
|
||||
u32 pitch;
|
||||
SurfaceTarget target;
|
||||
SurfaceClass identity;
|
||||
u32 max_mip_level;
|
||||
bool is_layered;
|
||||
bool is_array;
|
||||
bool srgb_conversion;
|
||||
// Parameters used for caching
|
||||
u8* host_ptr;
|
||||
GPUVAddr gpu_addr;
|
||||
std::size_t size_in_bytes;
|
||||
std::size_t size_in_bytes_gl;
|
||||
|
||||
// Render target specific parameters, not used in caching
|
||||
struct {
|
||||
u32 index;
|
||||
u32 array_mode;
|
||||
u32 volume;
|
||||
u32 layer_stride;
|
||||
u32 base_layer;
|
||||
} rt;
|
||||
|
||||
private:
|
||||
std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
|
||||
bool uncompressed = false) const;
|
||||
std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
|
||||
bool uncompressed = false) const;
|
||||
};
|
||||
|
||||
}; // namespace OpenGL
|
||||
|
||||
/// Hashable variation of SurfaceParams, used for a key in the surface cache
|
||||
struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
|
||||
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
|
||||
SurfaceReserveKey res;
|
||||
res.state = params;
|
||||
res.state.identity = {}; // Ignore the origin of the texture
|
||||
res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
|
||||
res.state.rt = {}; // Ignore rt config in caching
|
||||
return res;
|
||||
}
|
||||
};
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<SurfaceReserveKey> {
|
||||
std::size_t operator()(const SurfaceReserveKey& k) const {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class RasterizerOpenGL;
|
||||
|
||||
// This is used to store temporary big buffers,
|
||||
// instead of creating/destroying all the time
|
||||
struct RasterizerTemporaryMemory {
|
||||
std::vector<std::vector<u8>> gl_buffer;
|
||||
};
|
||||
|
||||
class CachedSurface final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedSurface(const SurfaceParams& params);
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return cached_size_in_bytes;
|
||||
}
|
||||
|
||||
std::size_t GetMemorySize() const {
|
||||
return memory_size;
|
||||
}
|
||||
|
||||
const OGLTexture& Texture() const {
|
||||
return texture;
|
||||
}
|
||||
|
||||
const OGLTexture& Texture(bool as_array) {
|
||||
if (params.is_array == as_array) {
|
||||
return texture;
|
||||
} else {
|
||||
EnsureTextureDiscrepantView();
|
||||
return discrepant_view;
|
||||
}
|
||||
}
|
||||
|
||||
GLenum Target() const {
|
||||
return gl_target;
|
||||
}
|
||||
|
||||
const SurfaceParams& GetSurfaceParams() const {
|
||||
return params;
|
||||
}
|
||||
|
||||
// Read/Write data in Switch memory to/from gl_buffer
|
||||
void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
|
||||
void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
|
||||
|
||||
// Upload data in gl_buffer to this surface's texture
|
||||
void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle);
|
||||
|
||||
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
|
||||
Tegra::Texture::SwizzleSource swizzle_y,
|
||||
Tegra::Texture::SwizzleSource swizzle_z,
|
||||
Tegra::Texture::SwizzleSource swizzle_w);
|
||||
|
||||
void MarkReinterpreted() {
|
||||
reinterpreted = true;
|
||||
}
|
||||
|
||||
bool IsReinterpreted() const {
|
||||
return reinterpreted;
|
||||
}
|
||||
|
||||
void MarkForReload(bool reload) {
|
||||
must_reload = reload;
|
||||
}
|
||||
|
||||
bool MustReload() const {
|
||||
return must_reload;
|
||||
}
|
||||
|
||||
bool IsUploaded() const {
|
||||
return params.identity == SurfaceParams::SurfaceClass::Uploaded;
|
||||
}
|
||||
|
||||
private:
|
||||
void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
|
||||
GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
|
||||
void EnsureTextureDiscrepantView();
|
||||
|
||||
OGLTexture texture;
|
||||
OGLTexture discrepant_view;
|
||||
SurfaceParams params{};
|
||||
GLenum gl_target{};
|
||||
GLenum gl_internal_format{};
|
||||
std::size_t cached_size_in_bytes{};
|
||||
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
|
||||
std::size_t memory_size;
|
||||
bool reinterpreted = false;
|
||||
bool must_reload = false;
|
||||
VAddr cpu_addr{};
|
||||
};
|
||||
|
||||
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
|
||||
public:
|
||||
explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);
|
||||
|
||||
/// Get a surface based on the texture configuration
|
||||
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
|
||||
const GLShader::SamplerEntry& entry);
|
||||
|
||||
/// Get the depth surface based on the framebuffer configuration
|
||||
Surface GetDepthBufferSurface(bool preserve_contents);
|
||||
|
||||
/// Get the color surface based on the framebuffer configuration and the specified render target
|
||||
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
|
||||
|
||||
/// Tries to find a framebuffer using on the provided CPU address
|
||||
Surface TryFindFramebufferSurface(const u8* host_ptr) const;
|
||||
|
||||
/// Copies the contents of one surface to another
|
||||
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect);
|
||||
|
||||
void SignalPreDrawCall();
|
||||
void SignalPostDrawCall();
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const Surface& object) override {
|
||||
object->FlushGLBuffer(temporal_memory);
|
||||
}
|
||||
|
||||
private:
|
||||
void LoadSurface(const Surface& surface);
|
||||
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
|
||||
|
||||
/// Gets an uncached surface, creating it if need be
|
||||
Surface GetUncachedSurface(const SurfaceParams& params);
|
||||
|
||||
/// Recreates a surface with new parameters
|
||||
Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
|
||||
|
||||
/// Reserves a unique surface that can be reused later
|
||||
void ReserveSurface(const Surface& surface);
|
||||
|
||||
/// Tries to get a reserved surface for the specified parameters
|
||||
Surface TryGetReservedSurface(const SurfaceParams& params);
|
||||
|
||||
// Partialy reinterpret a surface based on a triggering_surface that collides with it.
|
||||
// returns true if the reinterpret was successful, false in case it was not.
|
||||
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
|
||||
|
||||
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
|
||||
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
|
||||
const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
|
||||
const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
|
||||
|
||||
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
|
||||
/// previously been used. This is to prevent surfaces from being constantly created and
|
||||
/// destroyed when used with different surface parameters.
|
||||
std::unordered_map<SurfaceReserveKey, Surface> surface_reserve;
|
||||
|
||||
OGLFramebuffer read_framebuffer;
|
||||
OGLFramebuffer draw_framebuffer;
|
||||
|
||||
bool texception = false;
|
||||
|
||||
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
|
||||
/// using the new format.
|
||||
OGLBuffer copy_pbo;
|
||||
|
||||
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
|
||||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
||||
Surface last_depth_buffer;
|
||||
|
||||
RasterizerTemporaryMemory temporal_memory;
|
||||
|
||||
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
|
||||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
||||
|
||||
static auto GetReinterpretInterval(const Surface& object) {
|
||||
return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
|
||||
object->GetCacheAddr() + object->GetMemorySize() - 1);
|
||||
}
|
||||
|
||||
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
|
||||
SurfaceIntervalCache reinterpreted_surfaces;
|
||||
|
||||
void RegisterReinterpretSurface(Surface reinterpret_surface) {
|
||||
auto interval = GetReinterpretInterval(reinterpret_surface);
|
||||
reinterpreted_surfaces.insert({interval, reinterpret_surface});
|
||||
reinterpret_surface->MarkReinterpreted();
|
||||
}
|
||||
|
||||
Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
|
||||
const SurfaceInterval interval{addr};
|
||||
for (auto& pair :
|
||||
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
|
||||
return pair.second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Register(const Surface& object) override {
|
||||
RasterizerCache<Surface>::Register(object);
|
||||
}
|
||||
|
||||
/// Unregisters an object from the cache
|
||||
void Unregister(const Surface& object) override {
|
||||
if (object->IsReinterpreted()) {
|
||||
auto interval = GetReinterpretInterval(object);
|
||||
reinterpreted_surfaces.erase(interval);
|
||||
}
|
||||
RasterizerCache<Surface>::Unregister(object);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -33,6 +33,24 @@ void OGLTexture::Release() {
|
|||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLTextureView::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenTextures(1, &handle);
|
||||
}
|
||||
|
||||
void OGLTextureView::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteTextures(1, &handle);
|
||||
OpenGLState::GetCurState().UnbindTexture(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLSampler::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
@ -130,6 +148,12 @@ void OGLBuffer::Release() {
|
|||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
|
||||
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
|
||||
|
||||
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
void OGLSync::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
|
|
@ -36,6 +36,31 @@ public:
|
|||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLTextureView : private NonCopyable {
|
||||
public:
|
||||
OGLTextureView() = default;
|
||||
|
||||
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLTextureView() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLTextureView& operator=(OGLTextureView&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLSampler : private NonCopyable {
|
||||
public:
|
||||
OGLSampler() = default;
|
||||
|
@ -161,6 +186,9 @@ public:
|
|||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
// Converts the buffer into a stream copy buffer with a fixed size
|
||||
void MakeStreamCopy(std::size_t buffer_size);
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -103,15 +103,22 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
|
|||
/// Calculates the size of a program stream
|
||||
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
|
||||
constexpr std::size_t start_offset = 10;
|
||||
// This is the encoded version of BRA that jumps to itself. All Nvidia
|
||||
// shaders end with one.
|
||||
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
|
||||
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
|
||||
std::size_t offset = start_offset;
|
||||
std::size_t size = start_offset * sizeof(u64);
|
||||
while (offset < program.size()) {
|
||||
const u64 instruction = program[offset];
|
||||
if (!IsSchedInstruction(offset, start_offset)) {
|
||||
if (instruction == 0 || (instruction >> 52) == 0x50b) {
|
||||
if ((instruction & mask) == self_jumping_branch) {
|
||||
// End on Maxwell's "nop" instruction
|
||||
break;
|
||||
}
|
||||
if (instruction == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
size += sizeof(u64);
|
||||
offset++;
|
||||
|
@ -168,8 +175,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
|
|||
}
|
||||
|
||||
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
|
||||
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
|
||||
GLenum primitive_mode, bool hint_retrievable = false) {
|
||||
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
|
||||
bool hint_retrievable = false) {
|
||||
auto base_bindings{variant.base_bindings};
|
||||
const auto primitive_mode{variant.primitive_mode};
|
||||
const auto texture_buffer_usage{variant.texture_buffer_usage};
|
||||
|
||||
std::string source = "#version 430 core\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
|
@ -186,6 +197,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
|
|||
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
|
||||
base_bindings.sampler++);
|
||||
}
|
||||
for (const auto& image : entries.images) {
|
||||
source +=
|
||||
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
|
||||
}
|
||||
|
||||
// Transform 1D textures to texture samplers by declaring its preprocessor macros.
|
||||
for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
|
||||
if (!texture_buffer_usage.test(i)) {
|
||||
continue;
|
||||
}
|
||||
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
|
||||
}
|
||||
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
const auto [glsl_topology, debug_name, max_vertices] =
|
||||
|
@ -254,20 +277,18 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
|
|||
return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
|
||||
}
|
||||
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
|
||||
BaseBindings base_bindings) {
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
|
||||
GLuint handle{};
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
handle = GetGeometryShader(primitive_mode, base_bindings);
|
||||
handle = GetGeometryShader(variant);
|
||||
} else {
|
||||
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
|
||||
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
|
||||
auto& program = entry->second;
|
||||
if (is_cache_miss) {
|
||||
program = TryLoadProgram(primitive_mode, base_bindings);
|
||||
program = TryLoadProgram(variant);
|
||||
if (!program) {
|
||||
program =
|
||||
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
|
||||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||
program = SpecializeShader(code, entries, program_type, variant);
|
||||
disk_cache.SaveUsage(GetUsage(variant));
|
||||
}
|
||||
|
||||
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
|
||||
|
@ -276,6 +297,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
|
|||
handle = program->handle;
|
||||
}
|
||||
|
||||
auto base_bindings{variant.base_bindings};
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
|
||||
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
|
||||
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
|
||||
|
@ -283,43 +305,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
|
|||
return {handle, base_bindings};
|
||||
}
|
||||
|
||||
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
|
||||
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
|
||||
GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
|
||||
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
|
||||
auto& programs = entry->second;
|
||||
|
||||
switch (primitive_mode) {
|
||||
switch (variant.primitive_mode) {
|
||||
case GL_POINTS:
|
||||
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.points, variant);
|
||||
case GL_LINES:
|
||||
case GL_LINE_STRIP:
|
||||
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.lines, variant);
|
||||
case GL_LINES_ADJACENCY:
|
||||
case GL_LINE_STRIP_ADJACENCY:
|
||||
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.lines_adjacency, variant);
|
||||
case GL_TRIANGLES:
|
||||
case GL_TRIANGLE_STRIP:
|
||||
case GL_TRIANGLE_FAN:
|
||||
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.triangles, variant);
|
||||
case GL_TRIANGLES_ADJACENCY:
|
||||
case GL_TRIANGLE_STRIP_ADJACENCY:
|
||||
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.triangles_adjacency, variant);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown primitive mode.");
|
||||
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.points, variant);
|
||||
}
|
||||
}
|
||||
|
||||
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
|
||||
GLenum primitive_mode) {
|
||||
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
|
||||
const ProgramVariant& variant) {
|
||||
if (target_program) {
|
||||
return target_program->handle;
|
||||
}
|
||||
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
|
||||
target_program = TryLoadProgram(primitive_mode, base_bindings);
|
||||
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
|
||||
target_program = TryLoadProgram(variant);
|
||||
if (!target_program) {
|
||||
target_program =
|
||||
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
|
||||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||
target_program = SpecializeShader(code, entries, program_type, variant);
|
||||
disk_cache.SaveUsage(GetUsage(variant));
|
||||
}
|
||||
|
||||
LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
|
||||
|
@ -327,18 +348,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
|
|||
return target_program->handle;
|
||||
};
|
||||
|
||||
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
|
||||
BaseBindings base_bindings) const {
|
||||
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
|
||||
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
|
||||
const auto found = precompiled_programs.find(GetUsage(variant));
|
||||
if (found == precompiled_programs.end()) {
|
||||
return {};
|
||||
}
|
||||
return found->second;
|
||||
}
|
||||
|
||||
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
|
||||
BaseBindings base_bindings) const {
|
||||
return {unique_identifier, base_bindings, primitive_mode};
|
||||
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
|
||||
ShaderDiskCacheUsage usage;
|
||||
usage.unique_identifier = unique_identifier;
|
||||
usage.variant = variant;
|
||||
return usage;
|
||||
}
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
|
@ -404,8 +426,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
|||
}
|
||||
if (!shader) {
|
||||
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
|
||||
unspecialized.program_type, usage.bindings,
|
||||
usage.primitive, true);
|
||||
unspecialized.program_type, usage.variant, true);
|
||||
}
|
||||
|
||||
std::scoped_lock lock(mutex);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
|
@ -74,8 +75,7 @@ public:
|
|||
}
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
|
||||
BaseBindings base_bindings);
|
||||
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
|
||||
|
||||
private:
|
||||
explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
|
@ -92,15 +92,14 @@ private:
|
|||
CachedProgram triangles_adjacency;
|
||||
};
|
||||
|
||||
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
|
||||
GLuint GetGeometryShader(const ProgramVariant& variant);
|
||||
|
||||
/// Generates a geometry shader or returns one that already exists.
|
||||
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
|
||||
GLenum primitive_mode);
|
||||
GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);
|
||||
|
||||
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
|
||||
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
|
||||
|
||||
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
|
||||
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
|
||||
|
||||
u8* host_ptr{};
|
||||
VAddr cpu_addr{};
|
||||
|
@ -113,8 +112,8 @@ private:
|
|||
std::string code;
|
||||
std::size_t shader_length{};
|
||||
|
||||
std::unordered_map<BaseBindings, CachedProgram> programs;
|
||||
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
|
||||
std::unordered_map<ProgramVariant, CachedProgram> programs;
|
||||
std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;
|
||||
|
||||
std::unordered_map<u32, GLuint> cbuf_resource_cache;
|
||||
std::unordered_map<u32, GLuint> gmem_resource_cache;
|
||||
|
|
|
@ -180,6 +180,7 @@ public:
|
|||
DeclareGlobalMemory();
|
||||
DeclareSamplers();
|
||||
DeclarePhysicalAttributeReader();
|
||||
DeclareImages();
|
||||
|
||||
code.AddLine("void execute_{}() {{", suffix);
|
||||
++code.scope;
|
||||
|
@ -234,6 +235,9 @@ public:
|
|||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
entries.samplers.emplace_back(sampler);
|
||||
}
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
entries.images.emplace_back(image);
|
||||
}
|
||||
for (const auto& gmem_pair : ir.GetGlobalMemory()) {
|
||||
const auto& [base, usage] = gmem_pair;
|
||||
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
|
||||
|
@ -453,9 +457,13 @@ private:
|
|||
void DeclareSamplers() {
|
||||
const auto& samplers = ir.GetSamplers();
|
||||
for (const auto& sampler : samplers) {
|
||||
std::string sampler_type = [&sampler] {
|
||||
const std::string name{GetSampler(sampler)};
|
||||
const std::string description{"layout (binding = SAMPLER_BINDING_" +
|
||||
std::to_string(sampler.GetIndex()) + ") uniform"};
|
||||
std::string sampler_type = [&]() {
|
||||
switch (sampler.GetType()) {
|
||||
case Tegra::Shader::TextureType::Texture1D:
|
||||
// Special cased, read below.
|
||||
return "sampler1D";
|
||||
case Tegra::Shader::TextureType::Texture2D:
|
||||
return "sampler2D";
|
||||
|
@ -475,8 +483,19 @@ private:
|
|||
sampler_type += "Shadow";
|
||||
}
|
||||
|
||||
code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(),
|
||||
sampler_type, GetSampler(sampler));
|
||||
if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) {
|
||||
// 1D textures can be aliased to texture buffers, hide the declarations behind a
|
||||
// preprocessor flag and use one or the other from the GPU state. This has to be
|
||||
// done because shaders don't have enough information to determine the texture type.
|
||||
EmitIfdefIsBuffer(sampler);
|
||||
code.AddLine("{} samplerBuffer {};", description, name);
|
||||
code.AddLine("#else");
|
||||
code.AddLine("{} {} {};", description, sampler_type, name);
|
||||
code.AddLine("#endif");
|
||||
} else {
|
||||
// The other texture types (2D, 3D and cubes) don't have this issue.
|
||||
code.AddLine("{} {} {};", description, sampler_type, name);
|
||||
}
|
||||
}
|
||||
if (!samplers.empty()) {
|
||||
code.AddNewLine();
|
||||
|
@ -516,6 +535,37 @@ private:
|
|||
code.AddNewLine();
|
||||
}
|
||||
|
||||
void DeclareImages() {
|
||||
const auto& images{ir.GetImages()};
|
||||
for (const auto& image : images) {
|
||||
const std::string image_type = [&]() {
|
||||
switch (image.GetType()) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
return "image1D";
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return "bufferImage";
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
return "image1DArray";
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return "image2D";
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
return "image2DArray";
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return "image3D";
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "image1D";
|
||||
}
|
||||
}();
|
||||
code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform "
|
||||
"{} {};",
|
||||
image.GetIndex(), image_type, GetImage(image));
|
||||
}
|
||||
if (!images.empty()) {
|
||||
code.AddNewLine();
|
||||
}
|
||||
}
|
||||
|
||||
void VisitBlock(const NodeBlock& bb) {
|
||||
for (const auto& node : bb) {
|
||||
if (const std::string expr = Visit(node); !expr.empty()) {
|
||||
|
@ -1439,13 +1489,61 @@ private:
|
|||
else if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
|
||||
// Store a copy of the expression without the lod to be used with texture buffers
|
||||
std::string expr_buffer = expr;
|
||||
|
||||
if (meta->lod) {
|
||||
expr += ", ";
|
||||
expr += CastOperand(Visit(meta->lod), Type::Int);
|
||||
}
|
||||
expr += ')';
|
||||
expr += GetSwizzle(meta->element);
|
||||
|
||||
return expr + GetSwizzle(meta->element);
|
||||
expr_buffer += ')';
|
||||
expr_buffer += GetSwizzle(meta->element);
|
||||
|
||||
const std::string tmp{code.GenerateTemporary()};
|
||||
EmitIfdefIsBuffer(meta->sampler);
|
||||
code.AddLine("float {} = {};", tmp, expr_buffer);
|
||||
code.AddLine("#else");
|
||||
code.AddLine("float {} = {};", tmp, expr);
|
||||
code.AddLine("#endif");
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
std::string ImageStore(Operation operation) {
|
||||
constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
|
||||
const auto meta{std::get<MetaImage>(operation.GetMeta())};
|
||||
|
||||
std::string expr = "imageStore(";
|
||||
expr += GetImage(meta.image);
|
||||
expr += ", ";
|
||||
|
||||
const std::size_t coords_count{operation.GetOperandsCount()};
|
||||
expr += constructors.at(coords_count - 1);
|
||||
for (std::size_t i = 0; i < coords_count; ++i) {
|
||||
expr += VisitOperand(operation, i, Type::Int);
|
||||
if (i + 1 < coords_count) {
|
||||
expr += ", ";
|
||||
}
|
||||
}
|
||||
expr += "), ";
|
||||
|
||||
const std::size_t values_count{meta.values.size()};
|
||||
UNIMPLEMENTED_IF(values_count != 4);
|
||||
expr += "vec4(";
|
||||
for (std::size_t i = 0; i < values_count; ++i) {
|
||||
expr += Visit(meta.values.at(i));
|
||||
if (i + 1 < values_count) {
|
||||
expr += ", ";
|
||||
}
|
||||
}
|
||||
expr += "));";
|
||||
|
||||
code.AddLine(expr);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string Branch(Operation operation) {
|
||||
|
@ -1688,6 +1786,8 @@ private:
|
|||
&GLSLDecompiler::TextureQueryLod,
|
||||
&GLSLDecompiler::TexelFetch,
|
||||
|
||||
&GLSLDecompiler::ImageStore,
|
||||
|
||||
&GLSLDecompiler::Branch,
|
||||
&GLSLDecompiler::PushFlowStack,
|
||||
&GLSLDecompiler::PopFlowStack,
|
||||
|
@ -1756,6 +1856,14 @@ private:
|
|||
return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
|
||||
}
|
||||
|
||||
std::string GetImage(const Image& image) const {
|
||||
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
|
||||
}
|
||||
|
||||
void EmitIfdefIsBuffer(const Sampler& sampler) {
|
||||
code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
|
||||
}
|
||||
|
||||
std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
|
||||
return fmt::format("{}_{}_{}", name, index, suffix);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ struct ShaderEntries;
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using ProgramResult = std::pair<std::string, ShaderEntries>;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
|
@ -74,6 +75,7 @@ struct ShaderEntries {
|
|||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<SamplerEntry> bindless_samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
|
|
|
@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
|
|||
Dump,
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 1;
|
||||
constexpr u32 NativeVersion = 4;
|
||||
|
||||
// Making sure sizes doesn't change by accident
|
||||
static_assert(sizeof(BaseBindings) == 12);
|
||||
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
|
||||
static_assert(sizeof(BaseBindings) == 16);
|
||||
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -332,11 +332,28 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
|||
static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
|
||||
}
|
||||
|
||||
u32 images_count{};
|
||||
if (!LoadObjectFromPrecompiled(images_count)) {
|
||||
return {};
|
||||
}
|
||||
for (u32 i = 0; i < images_count; ++i) {
|
||||
u64 offset{};
|
||||
u64 index{};
|
||||
u32 type{};
|
||||
u8 is_bindless{};
|
||||
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
|
||||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.images.emplace_back(
|
||||
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
|
||||
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
|
||||
}
|
||||
|
||||
u32 global_memory_count{};
|
||||
if (!LoadObjectFromPrecompiled(global_memory_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < global_memory_count; ++i) {
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
|
@ -360,7 +377,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
|||
if (!LoadObjectFromPrecompiled(shader_length)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
|
||||
|
||||
return entry;
|
||||
|
@ -400,6 +416,18 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
|
|||
}
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& image : entries.images) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
@ -30,22 +31,26 @@ class IOFile;
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ShaderDiskCacheUsage;
|
||||
struct ShaderDiskCacheDump;
|
||||
|
||||
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program
|
||||
using ProgramCode = std::vector<u64>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
using TextureBufferUsage = std::bitset<64>;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program.
|
||||
struct BaseBindings {
|
||||
u32 cbuf{};
|
||||
u32 gmem{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
|
||||
bool operator==(const BaseBindings& rhs) const {
|
||||
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
|
||||
return std::tie(cbuf, gmem, sampler, image) ==
|
||||
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
|
||||
}
|
||||
|
||||
bool operator!=(const BaseBindings& rhs) const {
|
||||
|
@ -53,15 +58,29 @@ struct BaseBindings {
|
|||
}
|
||||
};
|
||||
|
||||
/// Describes how a shader is used
|
||||
/// Describes the different variants a single program can be compiled.
|
||||
struct ProgramVariant {
|
||||
BaseBindings base_bindings;
|
||||
GLenum primitive_mode{};
|
||||
TextureBufferUsage texture_buffer_usage{};
|
||||
|
||||
bool operator==(const ProgramVariant& rhs) const {
|
||||
return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
|
||||
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
|
||||
}
|
||||
|
||||
bool operator!=(const ProgramVariant& rhs) const {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
|
||||
/// Describes how a shader is used.
|
||||
struct ShaderDiskCacheUsage {
|
||||
u64 unique_identifier{};
|
||||
BaseBindings bindings;
|
||||
GLenum primitive{};
|
||||
ProgramVariant variant;
|
||||
|
||||
bool operator==(const ShaderDiskCacheUsage& rhs) const {
|
||||
return std::tie(unique_identifier, bindings, primitive) ==
|
||||
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
|
||||
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
|
||||
}
|
||||
|
||||
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
|
||||
|
@ -76,7 +95,19 @@ namespace std {
|
|||
template <>
|
||||
struct hash<OpenGL::BaseBindings> {
|
||||
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
|
||||
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
|
||||
return static_cast<std::size_t>(bindings.cbuf) ^
|
||||
(static_cast<std::size_t>(bindings.gmem) << 8) ^
|
||||
(static_cast<std::size_t>(bindings.sampler) << 16) ^
|
||||
(static_cast<std::size_t>(bindings.image) << 24);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::ProgramVariant> {
|
||||
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
|
||||
return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
|
||||
std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
|
||||
(static_cast<std::size_t>(variant.primitive_mode) << 6);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -84,7 +115,7 @@ template <>
|
|||
struct hash<OpenGL::ShaderDiskCacheUsage> {
|
||||
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
|
||||
return static_cast<std::size_t>(usage.unique_identifier) ^
|
||||
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
|
||||
std::hash<OpenGL::ProgramVariant>()(usage.variant);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -275,26 +306,17 @@ private:
|
|||
return LoadArrayFromPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
bool LoadObjectFromPrecompiled(bool& object) {
|
||||
u8 value;
|
||||
const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
|
||||
if (!read_ok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
object = value != 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Core system
|
||||
Core::System& system;
|
||||
// Stored transferable shaders
|
||||
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
|
||||
// Stores whole precompiled cache which will be read from/saved to the precompiled cache file
|
||||
|
||||
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
|
||||
// file
|
||||
FileSys::VectorVfsFile precompiled_cache_virtual_file;
|
||||
// Stores the current offset of the precompiled cache file for IO purposes
|
||||
std::size_t precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
// Stored transferable shaders
|
||||
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
|
||||
|
||||
// The cache has been loaded at boot
|
||||
bool tried_to_load{};
|
||||
};
|
||||
|
|
|
@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
|
||||
bool use_persistent)
|
||||
: buffer_size(size) {
|
||||
gl_buffer.Create();
|
||||
|
||||
|
@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
|
|||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
if (GLAD_GL_ARB_buffer_storage) {
|
||||
if (use_persistent) {
|
||||
persistent = true;
|
||||
coherent = prefer_coherent;
|
||||
const GLbitfield flags =
|
||||
|
|
|
@ -13,7 +13,8 @@ namespace OpenGL {
|
|||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
|
||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
|
||||
bool use_persistent = true);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
|
|
614
src/video_core/renderer_opengl/gl_texture_cache.cpp
Normal file
614
src/video_core/renderer_opengl/gl_texture_cache.cpp
Normal file
|
@ -0,0 +1,614 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/morton.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/texture_cache/surface_base.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/textures/convert.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using VideoCore::MortonSwizzleMode;
|
||||
|
||||
using VideoCore::Surface::ComponentType;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceCompression;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
|
||||
|
||||
namespace {
|
||||
|
||||
struct FormatTuple {
|
||||
GLint internal_format;
|
||||
GLenum format;
|
||||
GLenum type;
|
||||
ComponentType component_type;
|
||||
bool compressed;
|
||||
};
|
||||
|
||||
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
|
||||
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
|
||||
false}, // A2B10G10R10U
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
|
||||
false}, // R11FG11FB10F
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXT1
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXT23
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXT45
|
||||
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
|
||||
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXN2UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // BC7U
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
|
||||
true}, // BC6H_UF16
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
|
||||
true}, // BC6H_SF16
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
|
||||
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
|
||||
{GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
|
||||
{GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
|
||||
false}, // RGBA8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
|
||||
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
|
||||
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
|
||||
// Compressed sRGB formats
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXT1_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXT23_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // DXT45_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
||||
true}, // BC7U_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
|
||||
|
||||
// Depth formats
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
|
||||
false}, // Z16
|
||||
|
||||
// DepthStencil formats
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
|
||||
false}, // Z24S8
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
|
||||
false}, // S8Z24
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
|
||||
ComponentType::Float, false}, // Z32FS8
|
||||
}};
|
||||
|
||||
const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
|
||||
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
|
||||
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
|
||||
ASSERT(component_type == format.component_type);
|
||||
return format;
|
||||
}
|
||||
|
||||
GLenum GetTextureTarget(const SurfaceTarget& target) {
|
||||
switch (target) {
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
return GL_TEXTURE_BUFFER;
|
||||
case SurfaceTarget::Texture1D:
|
||||
return GL_TEXTURE_1D;
|
||||
case SurfaceTarget::Texture2D:
|
||||
return GL_TEXTURE_2D;
|
||||
case SurfaceTarget::Texture3D:
|
||||
return GL_TEXTURE_3D;
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
return GL_TEXTURE_1D_ARRAY;
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
return GL_TEXTURE_2D_ARRAY;
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
return GL_TEXTURE_CUBE_MAP;
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
GLint GetSwizzleSource(SwizzleSource source) {
|
||||
switch (source) {
|
||||
case SwizzleSource::Zero:
|
||||
return GL_ZERO;
|
||||
case SwizzleSource::R:
|
||||
return GL_RED;
|
||||
case SwizzleSource::G:
|
||||
return GL_GREEN;
|
||||
case SwizzleSource::B:
|
||||
return GL_BLUE;
|
||||
case SwizzleSource::A:
|
||||
return GL_ALPHA;
|
||||
case SwizzleSource::OneInt:
|
||||
case SwizzleSource::OneFloat:
|
||||
return GL_ONE;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return GL_NONE;
|
||||
}
|
||||
|
||||
void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
|
||||
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1);
|
||||
if (params.num_levels == 1) {
|
||||
glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
|
||||
}
|
||||
}
|
||||
|
||||
OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
|
||||
OGLBuffer& texture_buffer) {
|
||||
OGLTexture texture;
|
||||
texture.Create(target);
|
||||
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width);
|
||||
break;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
texture_buffer.Create();
|
||||
glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
|
||||
nullptr, GL_DYNAMIC_STORAGE_BIT);
|
||||
glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
|
||||
case SurfaceTarget::Texture2D:
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
|
||||
params.height);
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width,
|
||||
params.height, params.depth);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
ApplyTextureDefaults(params, texture.handle);
|
||||
|
||||
return texture;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
|
||||
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
|
||||
const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
|
||||
internal_format = tuple.internal_format;
|
||||
format = tuple.format;
|
||||
type = tuple.type;
|
||||
is_compressed = tuple.compressed;
|
||||
target = GetTextureTarget(params.target);
|
||||
texture = CreateTexture(params, target, internal_format, texture_buffer);
|
||||
DecorateSurfaceName();
|
||||
main_view = CreateViewInner(
|
||||
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
|
||||
true);
|
||||
}
|
||||
|
||||
CachedSurface::~CachedSurface() = default;
|
||||
|
||||
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture_Download);
|
||||
|
||||
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
|
||||
|
||||
for (u32 level = 0; level < params.emulated_levels; ++level) {
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
|
||||
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
|
||||
if (is_compressed) {
|
||||
glGetCompressedTextureImage(texture.handle, level,
|
||||
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
|
||||
staging_buffer.data() + mip_offset);
|
||||
} else {
|
||||
glGetTextureImage(texture.handle, level, format, type,
|
||||
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
|
||||
staging_buffer.data() + mip_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
|
||||
SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
|
||||
for (u32 level = 0; level < params.emulated_levels; ++level) {
|
||||
UploadTextureMipmap(level, staging_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
|
||||
|
||||
auto compression_type = params.GetCompressionType();
|
||||
|
||||
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
|
||||
? params.GetConvertedMipmapOffset(level)
|
||||
: params.GetHostMipmapLevelOffset(level);
|
||||
const u8* buffer{staging_buffer.data() + mip_offset};
|
||||
if (is_compressed) {
|
||||
const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture2D:
|
||||
glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
|
||||
static_cast<GLsizei>(params.GetMipWidth(level)),
|
||||
static_cast<GLsizei>(params.GetMipHeight(level)),
|
||||
internal_format, image_size, buffer);
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
|
||||
static_cast<GLsizei>(params.GetMipWidth(level)),
|
||||
static_cast<GLsizei>(params.GetMipHeight(level)),
|
||||
static_cast<GLsizei>(params.GetMipDepth(level)),
|
||||
internal_format, image_size, buffer);
|
||||
break;
|
||||
case SurfaceTarget::TextureCubemap: {
|
||||
const std::size_t layer_size{params.GetHostLayerSize(level)};
|
||||
for (std::size_t face = 0; face < params.depth; ++face) {
|
||||
glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
|
||||
static_cast<GLsizei>(params.GetMipWidth(level)),
|
||||
static_cast<GLsizei>(params.GetMipHeight(level)), 1,
|
||||
internal_format, static_cast<GLsizei>(layer_size),
|
||||
buffer);
|
||||
buffer += layer_size;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else {
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
|
||||
buffer);
|
||||
break;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
ASSERT(level == 0);
|
||||
glNamedBufferSubData(texture_buffer.handle, 0,
|
||||
params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
|
||||
break;
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
case SurfaceTarget::Texture2D:
|
||||
glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
|
||||
params.GetMipHeight(level), format, type, buffer);
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glTextureSubImage3D(
|
||||
texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
|
||||
static_cast<GLsizei>(params.GetMipHeight(level)),
|
||||
static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
|
||||
break;
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
for (std::size_t face = 0; face < params.depth; ++face) {
|
||||
glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
|
||||
params.GetMipWidth(level), params.GetMipHeight(level), 1,
|
||||
format, type, buffer);
|
||||
buffer += params.GetHostLayerSize(level);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CachedSurface::DecorateSurfaceName() {
|
||||
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
|
||||
}
|
||||
|
||||
void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
|
||||
LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix);
|
||||
}
|
||||
|
||||
View CachedSurface::CreateView(const ViewParams& view_key) {
|
||||
return CreateViewInner(view_key, false);
|
||||
}
|
||||
|
||||
View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) {
|
||||
auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy);
|
||||
views[view_key] = view;
|
||||
if (!is_proxy)
|
||||
view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
|
||||
return view;
|
||||
}
|
||||
|
||||
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
|
||||
const bool is_proxy)
|
||||
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
|
||||
target = GetTextureTarget(params.target);
|
||||
if (!is_proxy) {
|
||||
texture_view = CreateTextureView();
|
||||
}
|
||||
swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
|
||||
}
|
||||
|
||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
|
||||
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
|
||||
ASSERT(params.num_layers == 1 && params.num_levels == 1);
|
||||
|
||||
const auto& owner_params = surface.GetSurfaceParams();
|
||||
|
||||
switch (owner_params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
|
||||
params.base_level);
|
||||
break;
|
||||
case SurfaceTarget::Texture2D:
|
||||
glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
|
||||
params.base_level);
|
||||
break;
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
|
||||
params.base_layer);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (new_swizzle == swizzle)
|
||||
return;
|
||||
swizzle = new_swizzle;
|
||||
const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
|
||||
GetSwizzleSource(z_source),
|
||||
GetSwizzleSource(w_source)};
|
||||
const GLuint handle = GetTexture();
|
||||
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
|
||||
}
|
||||
|
||||
OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||
const auto& owner_params = surface.GetSurfaceParams();
|
||||
OGLTextureView texture_view;
|
||||
texture_view.Create();
|
||||
|
||||
const GLuint handle{texture_view.handle};
|
||||
const FormatTuple& tuple{
|
||||
GetFormatTuple(owner_params.pixel_format, owner_params.component_type)};
|
||||
|
||||
glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
|
||||
params.num_levels, params.base_layer, params.num_layers);
|
||||
|
||||
ApplyTextureDefaults(owner_params, handle);
|
||||
|
||||
return texture_view;
|
||||
}
|
||||
|
||||
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
|
||||
VideoCore::RasterizerInterface& rasterizer,
|
||||
const Device& device)
|
||||
: TextureCacheBase{system, rasterizer} {
|
||||
src_framebuffer.Create();
|
||||
dst_framebuffer.Create();
|
||||
}
|
||||
|
||||
TextureCacheOpenGL::~TextureCacheOpenGL() = default;
|
||||
|
||||
Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
|
||||
return std::make_shared<CachedSurface>(gpu_addr, params);
|
||||
}
|
||||
|
||||
void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
|
||||
const VideoCommon::CopyParams& copy_params) {
|
||||
const auto& src_params = src_surface->GetSurfaceParams();
|
||||
const auto& dst_params = dst_surface->GetSurfaceParams();
|
||||
if (src_params.type != dst_params.type) {
|
||||
// A fallback is needed
|
||||
return;
|
||||
}
|
||||
const auto src_handle = src_surface->GetTexture();
|
||||
const auto src_target = src_surface->GetTarget();
|
||||
const auto dst_handle = dst_surface->GetTexture();
|
||||
const auto dst_target = dst_surface->GetTarget();
|
||||
glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
|
||||
copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
|
||||
copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
|
||||
copy_params.dest_z, copy_params.width, copy_params.height,
|
||||
copy_params.depth);
|
||||
}
|
||||
|
||||
void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
const auto& src_params{src_view->GetSurfaceParams()};
|
||||
const auto& dst_params{dst_view->GetSurfaceParams()};
|
||||
|
||||
OpenGLState prev_state{OpenGLState::GetCurState()};
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
OpenGLState state;
|
||||
state.draw.read_framebuffer = src_framebuffer.handle;
|
||||
state.draw.draw_framebuffer = dst_framebuffer.handle;
|
||||
state.Apply();
|
||||
|
||||
u32 buffers{};
|
||||
|
||||
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
|
||||
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
|
||||
|
||||
if (src_params.type == SurfaceType::ColorTexture) {
|
||||
src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
buffers = GL_COLOR_BUFFER_BIT;
|
||||
} else if (src_params.type == SurfaceType::Depth) {
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
|
||||
buffers = GL_DEPTH_BUFFER_BIT;
|
||||
} else if (src_params.type == SurfaceType::DepthStencil) {
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
|
||||
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
}
|
||||
|
||||
const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
|
||||
const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
|
||||
const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
|
||||
|
||||
glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
|
||||
dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
|
||||
is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
|
||||
}
|
||||
|
||||
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
|
||||
const auto& src_params = src_surface->GetSurfaceParams();
|
||||
const auto& dst_params = dst_surface->GetSurfaceParams();
|
||||
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
|
||||
|
||||
const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
|
||||
const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
|
||||
|
||||
const std::size_t source_size = src_surface->GetHostSizeInBytes();
|
||||
const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
|
||||
|
||||
const std::size_t buffer_size = std::max(source_size, dest_size);
|
||||
|
||||
GLuint copy_pbo_handle = FetchPBO(buffer_size);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
|
||||
|
||||
if (source_format.compressed) {
|
||||
glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
|
||||
nullptr);
|
||||
} else {
|
||||
glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
|
||||
static_cast<GLsizei>(source_size), nullptr);
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
|
||||
|
||||
const GLsizei width = static_cast<GLsizei>(dst_params.width);
|
||||
const GLsizei height = static_cast<GLsizei>(dst_params.height);
|
||||
const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
|
||||
if (dest_format.compressed) {
|
||||
LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
|
||||
UNREACHABLE();
|
||||
} else {
|
||||
switch (dst_params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
|
||||
dest_format.type, nullptr);
|
||||
break;
|
||||
case SurfaceTarget::Texture2D:
|
||||
glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
|
||||
dest_format.format, dest_format.type, nullptr);
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
|
||||
dest_format.format, dest_format.type, nullptr);
|
||||
break;
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
|
||||
dest_format.format, dest_format.type, nullptr);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
||||
static_cast<u32>(dst_params.target));
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
|
||||
ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; });
|
||||
const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
|
||||
OGLBuffer& cp = copy_pbo_cache[l2];
|
||||
if (cp.handle == 0) {
|
||||
const std::size_t ceil_size = 1ULL << l2;
|
||||
cp.Create();
|
||||
cp.MakeStreamCopy(ceil_size);
|
||||
}
|
||||
return cp.handle;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
143
src/video_core/renderer_opengl/gl_texture_cache.h
Normal file
143
src/video_core/renderer_opengl/gl_texture_cache.h
Normal file
|
@ -0,0 +1,143 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using VideoCommon::SurfaceParams;
|
||||
using VideoCommon::ViewParams;
|
||||
|
||||
class CachedSurfaceView;
|
||||
class CachedSurface;
|
||||
class TextureCacheOpenGL;
|
||||
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using View = std::shared_ptr<CachedSurfaceView>;
|
||||
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
|
||||
|
||||
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
|
||||
friend CachedSurfaceView;
|
||||
|
||||
public:
|
||||
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
|
||||
~CachedSurface();
|
||||
|
||||
void UploadTexture(const std::vector<u8>& staging_buffer) override;
|
||||
void DownloadTexture(std::vector<u8>& staging_buffer) override;
|
||||
|
||||
GLenum GetTarget() const {
|
||||
return target;
|
||||
}
|
||||
|
||||
GLuint GetTexture() const {
|
||||
return texture.handle;
|
||||
}
|
||||
|
||||
protected:
|
||||
void DecorateSurfaceName();
|
||||
|
||||
View CreateView(const ViewParams& view_key) override;
|
||||
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
|
||||
|
||||
private:
|
||||
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
|
||||
|
||||
GLenum internal_format{};
|
||||
GLenum format{};
|
||||
GLenum type{};
|
||||
bool is_compressed{};
|
||||
GLenum target{};
|
||||
u32 view_count{};
|
||||
|
||||
OGLTexture texture;
|
||||
OGLBuffer texture_buffer;
|
||||
};
|
||||
|
||||
class CachedSurfaceView final : public VideoCommon::ViewBase {
|
||||
public:
|
||||
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
|
||||
~CachedSurfaceView();
|
||||
|
||||
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
|
||||
void Attach(GLenum attachment, GLenum target) const;
|
||||
|
||||
GLuint GetTexture() const {
|
||||
if (is_proxy) {
|
||||
return surface.GetTexture();
|
||||
}
|
||||
return texture_view.handle;
|
||||
}
|
||||
|
||||
const SurfaceParams& GetSurfaceParams() const {
|
||||
return surface.GetSurfaceParams();
|
||||
}
|
||||
|
||||
void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
|
||||
void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
|
||||
|
||||
private:
|
||||
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source) const {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
OGLTextureView CreateTextureView() const;
|
||||
|
||||
CachedSurface& surface;
|
||||
GLenum target{};
|
||||
|
||||
OGLTextureView texture_view;
|
||||
u32 swizzle;
|
||||
bool is_proxy;
|
||||
};
|
||||
|
||||
class TextureCacheOpenGL final : public TextureCacheBase {
|
||||
public:
|
||||
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
const Device& device);
|
||||
~TextureCacheOpenGL();
|
||||
|
||||
protected:
|
||||
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
|
||||
|
||||
void ImageCopy(Surface& src_surface, Surface& dst_surface,
|
||||
const VideoCommon::CopyParams& copy_params) override;
|
||||
|
||||
void ImageBlit(View& src_view, View& dst_view,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
|
||||
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
|
||||
|
||||
private:
|
||||
GLuint FetchPBO(std::size_t buffer_size);
|
||||
|
||||
OGLFramebuffer src_framebuffer;
|
||||
OGLFramebuffer dst_framebuffer;
|
||||
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -471,7 +471,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
|
|||
}
|
||||
}
|
||||
|
||||
/// Initialize the renderer
|
||||
bool RendererOpenGL::Init() {
|
||||
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
|
||||
|
||||
|
|
|
@ -5,8 +5,10 @@
|
|||
#include <string>
|
||||
#include <fmt/format.h>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
@ -63,4 +65,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie
|
|||
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -32,4 +32,4 @@ private:
|
|||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
|
||||
|
||||
} // namespace OpenGL
|
||||
} // namespace OpenGL
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue