video_core: Rewrite the texture cache
The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
This commit is contained in:
parent
9106ac1e6b
commit
9764c13d6d
152 changed files with 10609 additions and 8351 deletions
|
@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
|
|||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, std::size_t stream_size_)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_,
|
||||
std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)},
|
||||
device{device_} {
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ namespace OpenGL {
|
|||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
|
@ -52,9 +53,10 @@ private:
|
|||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, std::size_t stream_size_);
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
|
|
|
@ -5,9 +5,11 @@
|
|||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
|
|||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LimitUBOs = {
|
||||
constexpr std::array LIMIT_UBOS = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSSBOs = {
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SSBOS = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
|
||||
|
||||
constexpr std::array LimitImages = {
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SAMPLERS = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
|
||||
};
|
||||
constexpr std::array LIMIT_IMAGES = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
|
@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
|
|||
return extensions;
|
||||
}
|
||||
|
||||
bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
|
||||
return std::find(images.begin(), images.end(), extension) != images.end();
|
||||
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
|
||||
return std::ranges::find(extensions, extension) != extensions.end();
|
||||
}
|
||||
|
||||
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
|
@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
|||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
return max;
|
||||
}
|
||||
|
||||
|
@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
|||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
bindings[stage] = {
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages,
|
||||
LIMIT_SAMPLERS[stage])};
|
||||
}
|
||||
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
|
@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
|||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
|||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
||||
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
|
||||
}
|
||||
|
||||
// Compute doesn't care about any of this.
|
||||
|
@ -188,6 +193,11 @@ bool IsASTCSupported() {
|
|||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
|
||||
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
|
@ -206,9 +216,8 @@ Device::Device()
|
|||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
||||
|
@ -224,6 +233,7 @@ Device::Device()
|
|||
has_precise_bug = TestPreciseBug();
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
|
||||
has_debugging_tool_attached = IsDebugToolAttached(extensions);
|
||||
|
||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||
// uniform buffers as "push constants"
|
||||
|
|
|
@ -36,11 +36,11 @@ public:
|
|||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
}
|
||||
|
||||
std::size_t GetUniformBufferAlignment() const {
|
||||
size_t GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
std::size_t GetShaderStorageBufferAlignment() const {
|
||||
size_t GetShaderStorageBufferAlignment() const {
|
||||
return shader_storage_alignment;
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,10 @@ public:
|
|||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_debugging_tool_attached;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
|
@ -118,8 +122,8 @@ private:
|
|||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
size_t uniform_buffer_alignment{};
|
||||
size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
u32 max_compute_shared_memory_size{};
|
||||
|
@ -135,6 +139,7 @@ private:
|
|||
bool has_precise_bug{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool has_debugging_tool_attached{};
|
||||
bool use_assembly_shaders{};
|
||||
bool use_asynchronous_shaders{};
|
||||
};
|
||||
|
|
|
@ -46,7 +46,7 @@ void GLInnerFence::Wait() {
|
|||
}
|
||||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
|
|
|
@ -33,12 +33,12 @@ private:
|
|||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
|
||||
|
||||
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
|
||||
|
||||
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
|
||||
const auto [entry, is_cache_miss] = cache.try_emplace(key);
|
||||
auto& framebuffer{entry->second};
|
||||
if (is_cache_miss) {
|
||||
framebuffer = CreateFramebuffer(key);
|
||||
}
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
|
||||
OGLFramebuffer framebuffer;
|
||||
framebuffer.Create();
|
||||
|
||||
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
|
||||
|
||||
if (key.zeta) {
|
||||
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
|
||||
const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
|
||||
key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
std::size_t num_buffers = 0;
|
||||
std::array<GLenum, Maxwell::NumRenderTargets> targets;
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (!key.colors[index]) {
|
||||
targets[index] = GL_NONE;
|
||||
continue;
|
||||
}
|
||||
const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
|
||||
key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
|
||||
const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
|
||||
targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
|
||||
num_buffers = index + 1;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
|
||||
} else {
|
||||
glDrawBuffer(GL_NONE);
|
||||
}
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
std::size_t FramebufferCacheKey::Hash() const noexcept {
|
||||
std::size_t hash = std::hash<View>{}(zeta);
|
||||
for (const auto& color : colors) {
|
||||
hash ^= std::hash<View>{}(color);
|
||||
}
|
||||
hash ^= static_cast<std::size_t>(color_attachments) << 16;
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return std::tie(colors, zeta, color_attachments) ==
|
||||
std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,68 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
constexpr std::size_t BitsPerAttachment = 4;
|
||||
|
||||
struct FramebufferCacheKey {
|
||||
View zeta;
|
||||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
|
||||
u32 color_attachments = 0;
|
||||
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FramebufferCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
void SetAttachment(std::size_t index, u32 attachment) {
|
||||
color_attachments |= attachment << (BitsPerAttachment * index);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::FramebufferCacheKey> {
|
||||
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class FramebufferCacheOpenGL {
|
||||
public:
|
||||
FramebufferCacheOpenGL();
|
||||
~FramebufferCacheOpenGL();
|
||||
|
||||
GLuint GetFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
private:
|
||||
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -25,12 +25,15 @@
|
|||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
|
|||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
constexpr size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||
|
||||
constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
|
||||
constexpr size_t MAX_TEXTURES = 192;
|
||||
constexpr size_t MAX_IMAGES = 48;
|
||||
|
||||
struct TextureHandle {
|
||||
constexpr TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
ShaderType shader_type, std::size_t index = 0) {
|
||||
TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
|
||||
ShaderType shader_type, size_t index = 0) {
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
|
@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
|
|||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
return TextureHandle(handle_1 | handle_2, via_header_index);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(handle);
|
||||
}
|
||||
|
||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||
return engine.GetStageTexture(shader_type, offset);
|
||||
} else {
|
||||
return engine.GetTexture(offset);
|
||||
const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return TextureHandle(raw, via_header_index);
|
||||
}
|
||||
const u32 buffer = engine.GetBoundBuffer();
|
||||
const u64 offset = (entry.offset + index) * sizeof(u32);
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
|
@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
|||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
|
@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
|
|||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::TextureType::Texture1D:
|
||||
return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
|
||||
case Tegra::Shader::TextureType::Texture2D:
|
||||
return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
|
||||
case Tegra::Shader::TextureType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::TextureType::TextureCube:
|
||||
return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
return ImageViewType::e1D;
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
return ImageViewType::e1DArray;
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return ImageViewType::e2D;
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
return ImageViewType::e2DArray;
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_)
|
||||
: RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
|
||||
stream_buffer(device, state_tracker),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
query_cache(*this, maxwell3d, gpu_memory),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window_) {
|
||||
CheckExtensions();
|
||||
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
|
@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
|
@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::CheckExtensions() {
|
||||
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexFormat() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexFormats]) {
|
||||
|
@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
|||
return info.offset;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = maxwell3d.regs.shader_config[index];
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Currently this stages are not supported in the OpenGL backend.
|
||||
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
||||
if (program == Maxwell::ShaderProgram::TesselationControl ||
|
||||
|
@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
}
|
||||
|
||||
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||
|
||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
|
@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
break;
|
||||
}
|
||||
|
||||
// Stage indices are 0 - 5
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(stage, shader);
|
||||
SetupDrawImages(stage, shader);
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
|
@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (shader) {
|
||||
const auto base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
|
@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
|
|||
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureFramebuffers() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
|
||||
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
|
||||
return;
|
||||
}
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
|
||||
View depth_surface = texture_cache.GetDepthBufferSurface(true);
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
|
||||
|
||||
// Bind the framebuffer surfaces
|
||||
FramebufferCacheKey key;
|
||||
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
|
||||
for (std::size_t index = 0; index < colors_count; ++index) {
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
|
||||
if (!color_surface) {
|
||||
continue;
|
||||
}
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even
|
||||
// if the shader doesn't actually write to it.
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
|
||||
key.SetAttachment(index, regs.rt_control.GetMap(index));
|
||||
key.colors[index] = std::move(color_surface);
|
||||
}
|
||||
|
||||
if (depth_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even if
|
||||
// the shader doesn't actually write to it.
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
key.zeta = std::move(depth_surface);
|
||||
}
|
||||
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface;
|
||||
|
||||
if (using_color) {
|
||||
// Determine if we have to preserve the contents.
|
||||
// First we have to make sure all clear masks are enabled.
|
||||
bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
|
||||
!regs.clear_buffers.B || !regs.clear_buffers.A;
|
||||
const std::size_t index = regs.clear_buffers.RT;
|
||||
if (regs.clear_flags.scissor) {
|
||||
// Then we have to confirm scissor testing clears the whole image.
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
preserve_contents |= scissor.min_x > 0;
|
||||
preserve_contents |= scissor.min_y > 0;
|
||||
preserve_contents |= scissor.max_x < regs.rt[index].width;
|
||||
preserve_contents |= scissor.max_y < regs.rt[index].height;
|
||||
}
|
||||
|
||||
color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
}
|
||||
|
||||
View depth_surface;
|
||||
if (using_depth_stencil) {
|
||||
bool preserve_contents = false;
|
||||
if (regs.clear_flags.scissor) {
|
||||
// For depth stencil clears we only have to confirm scissor test covers the whole image.
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
preserve_contents |= scissor.min_x > 0;
|
||||
preserve_contents |= scissor.min_y > 0;
|
||||
preserve_contents |= scissor.max_x < regs.zeta_width;
|
||||
preserve_contents |= scissor.max_y < regs.zeta_height;
|
||||
}
|
||||
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
FramebufferCacheKey key;
|
||||
key.colors[0] = std::move(color_surface);
|
||||
key.zeta = std::move(depth_surface);
|
||||
|
||||
state_tracker.NotifyFramebuffer();
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
|
@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
|
|||
regs.clear_buffers.A) {
|
||||
use_color = true;
|
||||
|
||||
state_tracker.NotifyColorMask0();
|
||||
glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
|
||||
const GLuint index = regs.clear_buffers.RT;
|
||||
state_tracker.NotifyColorMask(index);
|
||||
glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
|
||||
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
|
||||
|
||||
// TODO(Rodrigo): Determine if clamping is used on clears
|
||||
|
@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
|
|||
state_tracker.NotifyScissor0();
|
||||
glDisablei(GL_SCISSOR_TEST, 0);
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||
|
||||
ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
}
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
}
|
||||
if (use_depth && use_stencil) {
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
|
||||
} else if (use_depth) {
|
||||
|
@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
const bool invalidated = buffer_cache.Map(buffer_size);
|
||||
|
||||
if (invalidated) {
|
||||
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
dirty[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
dirty[index] = true;
|
||||
}
|
||||
}
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
|
@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
}
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
texture_cache.GuardSamplers(true);
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
SetupShaders(primitive_mode);
|
||||
texture_cache.GuardSamplers(false);
|
||||
|
||||
ConfigureFramebuffers();
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
SetupShaders();
|
||||
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
buffer_cache.Unmap();
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
|
||||
if (texture_cache.TextureBarrier()) {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
BeginTransformFeedback(primitive_mode);
|
||||
|
||||
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
|
||||
|
@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const std::size_t buffer_size =
|
||||
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
|
@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
buffer_cache.Unmap();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
|||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
}
|
||||
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
|
@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
|
@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
texture_cache.SyncGuestHost();
|
||||
buffer_cache.SyncGuestHost();
|
||||
shader_cache.SyncGuestHost();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
if (!gpu.IsAsync()) {
|
||||
gpu_memory.Write<u32>(addr, value);
|
||||
|
@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
|
|||
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::TiledCacheBarrier() {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushCommands() {
|
||||
// Only flush when we have commands queued to OpenGL.
|
||||
if (num_queued_commands == 0) {
|
||||
|
@ -854,47 +833,97 @@ void RasterizerOpenGL::TickFrame() {
|
|||
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
|
||||
num_queued_commands = 0;
|
||||
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
texture_cache.DoFermiCopy(src, dst, copy_config);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
VAddr framebuffer_addr, u32 pixel_stride) {
|
||||
if (!framebuffer_addr) {
|
||||
return {};
|
||||
if (framebuffer_addr == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
||||
if (!surface) {
|
||||
return {};
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
|
||||
if (!image_view) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify that the cached surface is the same size and format as the requested framebuffer
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
const auto& pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
|
||||
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
|
||||
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
|
||||
|
||||
if (params.pixel_format != pixel_format) {
|
||||
LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
}
|
||||
|
||||
screen_info.display_texture = surface->GetTexture();
|
||||
screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
|
||||
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
|
||||
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
|
||||
|
||||
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
|
||||
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
|
||||
GLuint base_image, size_t& image_view_index,
|
||||
size_t& texture_index, size_t& image_index) {
|
||||
const GLuint* const samplers = sampler_handles.data() + texture_index;
|
||||
const GLuint* const textures = texture_handles.data() + texture_index;
|
||||
const GLuint* const images = image_handles.data() + image_index;
|
||||
|
||||
const size_t num_samplers = entries.samplers.size();
|
||||
for (const auto& sampler : entries.samplers) {
|
||||
for (size_t i = 0; i < sampler.size; ++i) {
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
|
||||
texture_handles[texture_index++] = handle;
|
||||
}
|
||||
}
|
||||
const size_t num_images = entries.images.size();
|
||||
for (size_t unit = 0; unit < num_images; ++unit) {
|
||||
// TODO: Mark as modified
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
|
||||
image_handles[image_index] = handle;
|
||||
++image_index;
|
||||
}
|
||||
if (num_samplers > 0) {
|
||||
glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
|
||||
glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
|
||||
}
|
||||
if (num_images > 0) {
|
||||
glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
|||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
|
@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().samplers) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
for (std::size_t i = 0; i < entry.size; ++i) {
|
||||
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
for (size_t index = 0; index < entry.size; ++index) {
|
||||
const auto handle =
|
||||
GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
|
||||
const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
u32 binding = 0;
|
||||
void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : kernel->GetEntries().samplers) {
|
||||
for (std::size_t i = 0; i < entry.size; ++i) {
|
||||
const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
for (size_t i = 0; i < entry.size; ++i) {
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
|
||||
const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry) {
|
||||
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
||||
if (!view) {
|
||||
// Can occur when texture addr is null or its memory is unmapped/invalid
|
||||
glBindSampler(binding, 0);
|
||||
glBindTextureUnit(binding, 0);
|
||||
return;
|
||||
}
|
||||
const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
glBindTextureUnit(binding, handle);
|
||||
if (!view->GetSurfaceParams().IsBuffer()) {
|
||||
glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
|
||||
u32 binding = device.GetBaseBindings(stage_index).image;
|
||||
void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
|
||||
SetupImage(binding++, tic, entry);
|
||||
const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
|
||||
u32 binding = 0;
|
||||
void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
|
||||
SetupImage(binding++, tic, entry);
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
|
||||
const ImageEntry& entry) {
|
||||
const auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
if (!view) {
|
||||
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
|
||||
return;
|
||||
}
|
||||
if (entry.is_written) {
|
||||
view->MarkAsModified(texture_cache.Tick());
|
||||
}
|
||||
const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
|
|||
flags[Dirty::PointSize] = false;
|
||||
|
||||
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
|
||||
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
|
||||
|
||||
if (maxwell3d.regs.vp_point_size.enable) {
|
||||
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
|
||||
glEnable(GL_PROGRAM_POINT_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
|
||||
// in OpenGL).
|
||||
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
|
||||
glDisable(GL_PROGRAM_POINT_SIZE);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLineState() {
|
||||
|
|
|
@ -7,12 +7,13 @@
|
|||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
@ -23,16 +24,14 @@
|
|||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
|
@ -51,7 +50,7 @@ class MemoryManager;
|
|||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
struct DrawParameters;
|
||||
struct ShaderEntries;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
|
@ -79,15 +78,18 @@ public:
|
|||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
|
@ -108,11 +110,14 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
/// Configures the color and depth framebuffer states.
|
||||
void ConfigureFramebuffers();
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
/// Configures the color and depth framebuffer for clearing.
|
||||
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
||||
void BindComputeTextures(Shader* kernel);
|
||||
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
@ -136,23 +141,16 @@ private:
|
|||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(Shader* kernel);
|
||||
|
||||
/// Configures a texture.
|
||||
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry);
|
||||
void SetupComputeTextures(const Shader* kernel);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(std::size_t stage_index, Shader* shader);
|
||||
void SetupDrawImages(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(Shader* shader);
|
||||
|
||||
/// Configures an image.
|
||||
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
|
@ -227,9 +225,6 @@ private:
|
|||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
/// Check for extension that are not strictly required but are needed for correct emulation
|
||||
void CheckExtensions();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
@ -242,7 +237,7 @@ private:
|
|||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders(GLenum primitive_mode);
|
||||
void SetupShaders();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
|
@ -254,19 +249,21 @@ private:
|
|||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
TextureCacheOpenGL texture_cache;
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
SamplerCacheOpenGL sampler_cache;
|
||||
FramebufferCacheOpenGL framebuffer_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
GLint vertex_binding = 0;
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
|
@ -280,7 +277,7 @@ private:
|
|||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
||||
u32 last_clip_distance_mask = 0;
|
||||
|
|
|
@ -71,7 +71,7 @@ void OGLSampler::Create() {
|
|||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenSamplers(1, &handle);
|
||||
glCreateSamplers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLSampler::Release() {
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
|
||||
|
||||
SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
|
||||
|
||||
OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
|
||||
OGLSampler sampler;
|
||||
sampler.Create();
|
||||
|
||||
const GLuint sampler_id{sampler.handle};
|
||||
glSamplerParameteri(
|
||||
sampler_id, GL_TEXTURE_MAG_FILTER,
|
||||
MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
|
||||
MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
|
||||
tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
|
||||
MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
|
||||
glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
|
||||
if (GLAD_GL_ARB_texture_filter_anisotropic) {
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
|
||||
} else if (GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
|
||||
}
|
||||
|
||||
return sampler;
|
||||
}
|
||||
|
||||
GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
|
||||
return sampler.handle;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,25 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/sampler_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
|
||||
public:
|
||||
explicit SamplerCacheOpenGL();
|
||||
~SamplerCacheOpenGL();
|
||||
|
||||
protected:
|
||||
OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
|
||||
|
||||
GLuint ToSamplerType(const OGLSampler& sampler) const override;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -27,7 +27,6 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
|
|
@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
|
|||
using Tegra::Shader::PixelImap;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureType;
|
||||
using VideoCommon::Shader::BuildTransformFeedback;
|
||||
using VideoCommon::Shader::Registry;
|
||||
|
||||
using namespace std::string_literals;
|
||||
using namespace VideoCommon::Shader;
|
||||
using namespace std::string_literals;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using Operation = const OperationNode&;
|
||||
|
@ -2753,11 +2751,11 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
std::string GetSampler(const Sampler& sampler) const {
|
||||
std::string GetSampler(const SamplerEntry& sampler) const {
|
||||
return AppendSuffix(sampler.index, "sampler");
|
||||
}
|
||||
|
||||
std::string GetImage(const Image& image) const {
|
||||
std::string GetImage(const ImageEntry& image) const {
|
||||
return AppendSuffix(image.index, "image");
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@ namespace OpenGL {
|
|||
class Device;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using ImageEntry = VideoCommon::Shader::ImageEntry;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
|
|
|
@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
|
|||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glDisable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
glUseProgram(program);
|
||||
is_graphics_bound = false;
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestCompute() {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::UseVertexShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||
|
|
|
@ -45,6 +45,12 @@ public:
|
|||
/// Rewinds BindHostPipeline state changes.
|
||||
void RestoreGuestPipeline();
|
||||
|
||||
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
|
||||
void BindHostCompute(GLuint program);
|
||||
|
||||
/// Rewinds BindHostCompute state changes.
|
||||
void RestoreGuestCompute();
|
||||
|
||||
void UseVertexShader(GLuint program);
|
||||
void UseGeometryShader(GLuint program);
|
||||
void UseFragmentShader(GLuint program);
|
||||
|
|
|
@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
|
|||
}
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateStreamBuffer() {
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -92,6 +92,8 @@ class StateTracker {
|
|||
public:
|
||||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateStreamBuffer();
|
||||
|
||||
void BindIndexBuffer(GLuint new_index_buffer) {
|
||||
if (index_buffer == new_index_buffer) {
|
||||
return;
|
||||
|
@ -100,6 +102,14 @@ public:
|
|||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
|
||||
}
|
||||
|
||||
void BindFramebuffer(GLuint new_framebuffer) {
|
||||
if (framebuffer == new_framebuffer) {
|
||||
return;
|
||||
}
|
||||
framebuffer = new_framebuffer;
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
|
||||
}
|
||||
|
||||
void NotifyScreenDrawVertexArray() {
|
||||
flags[OpenGL::Dirty::VertexFormats] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
|
||||
|
@ -129,9 +139,9 @@ public:
|
|||
flags[OpenGL::Dirty::Scissor0] = true;
|
||||
}
|
||||
|
||||
void NotifyColorMask0() {
|
||||
void NotifyColorMask(size_t index) {
|
||||
flags[OpenGL::Dirty::ColorMasks] = true;
|
||||
flags[OpenGL::Dirty::ColorMask0] = true;
|
||||
flags[OpenGL::Dirty::ColorMask0 + index] = true;
|
||||
}
|
||||
|
||||
void NotifyBlend0() {
|
||||
|
@ -190,6 +200,7 @@ public:
|
|||
private:
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
|
||||
|
||||
GLuint framebuffer = 0;
|
||||
GLuint index_buffer = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
|
@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
|
||||
: buffer_size(size) {
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
|
||||
: state_tracker{state_tracker_} {
|
||||
gl_buffer.Create();
|
||||
|
||||
GLsizeiptr allocate_size = size;
|
||||
if (vertex_data_usage) {
|
||||
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
|
||||
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
|
||||
// crash.
|
||||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
||||
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||
|
@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
|
|||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= buffer_size);
|
||||
ASSERT(alignment <= buffer_size);
|
||||
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= BUFFER_SIZE);
|
||||
ASSERT(alignment <= BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
if (buffer_pos + size > BUFFER_SIZE) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
glInvalidateBufferData(gl_buffer.handle);
|
||||
state_tracker.InvalidateStreamBuffer();
|
||||
|
||||
buffer_pos = 0;
|
||||
invalidate = true;
|
||||
}
|
||||
|
||||
return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
|
||||
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
|
|
|
@ -4,29 +4,31 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
|
||||
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, the offset within the buffer,
|
||||
* and the invalidation flag for previous chunks.
|
||||
* The return values are the pointer to the new chunk, and the offset within the buffer.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
|
@ -39,15 +41,18 @@ public:
|
|||
}
|
||||
|
||||
GLsizeiptr Size() const noexcept {
|
||||
return buffer_size;
|
||||
return BUFFER_SIZE;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
GLuint64EXT gpu_address = 0;
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr buffer_size = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -4,157 +4,247 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using VideoCommon::SurfaceParams;
|
||||
using VideoCommon::ViewParams;
|
||||
|
||||
class CachedSurfaceView;
|
||||
class CachedSurface;
|
||||
class TextureCacheOpenGL;
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
class StateTracker;
|
||||
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using View = std::shared_ptr<CachedSurfaceView>;
|
||||
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
|
||||
class Framebuffer;
|
||||
class Image;
|
||||
class ImageView;
|
||||
class Sampler;
|
||||
|
||||
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
|
||||
friend CachedSurfaceView;
|
||||
using VideoCommon::ImageId;
|
||||
using VideoCommon::ImageViewId;
|
||||
using VideoCommon::ImageViewType;
|
||||
using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
|
||||
class ImageBufferMap {
|
||||
public:
|
||||
explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_,
|
||||
bool is_astc_supported_);
|
||||
~CachedSurface();
|
||||
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
|
||||
~ImageBufferMap();
|
||||
|
||||
void UploadTexture(const std::vector<u8>& staging_buffer) override;
|
||||
void DownloadTexture(std::vector<u8>& staging_buffer) override;
|
||||
|
||||
GLenum GetTarget() const {
|
||||
return target;
|
||||
GLuint Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
GLuint GetTexture() const {
|
||||
std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<u8> span;
|
||||
OGLSync* sync;
|
||||
GLuint handle;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
GLenum compatibility_class;
|
||||
bool compatibility_by_size;
|
||||
bool is_compressed;
|
||||
};
|
||||
|
||||
class TextureCacheRuntime {
|
||||
friend Framebuffer;
|
||||
friend Image;
|
||||
friend ImageView;
|
||||
friend Sampler;
|
||||
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
|
||||
StateTracker& state_tracker);
|
||||
~TextureCacheRuntime();
|
||||
|
||||
void Finish();
|
||||
|
||||
ImageBufferMap MapUploadBuffer(size_t size);
|
||||
|
||||
ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
bool CanImageBeCopied(const Image& dst, const Image& src);
|
||||
|
||||
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void InsertUploadMemoryBarrier();
|
||||
|
||||
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
|
||||
|
||||
private:
|
||||
struct StagingBuffers {
|
||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||
~StagingBuffers();
|
||||
|
||||
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
|
||||
|
||||
size_t RequestBuffer(size_t requested_size);
|
||||
|
||||
std::optional<size_t> FindBuffer(size_t requested_size);
|
||||
|
||||
std::vector<OGLSync> syncs;
|
||||
std::vector<OGLBuffer> buffers;
|
||||
std::vector<u8*> maps;
|
||||
std::vector<size_t> sizes;
|
||||
GLenum storage_flags;
|
||||
GLenum map_flags;
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
StateTracker& state_tracker;
|
||||
UtilShaders util_shaders;
|
||||
|
||||
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
|
||||
|
||||
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
||||
StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
|
||||
|
||||
OGLTexture null_image_1d_array;
|
||||
OGLTexture null_image_cube_array;
|
||||
OGLTexture null_image_3d;
|
||||
OGLTexture null_image_rect;
|
||||
OGLTextureView null_image_view_1d;
|
||||
OGLTextureView null_image_view_2d;
|
||||
OGLTextureView null_image_view_2d_array;
|
||||
OGLTextureView null_image_view_cube;
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
friend ImageView;
|
||||
|
||||
public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return texture.handle;
|
||||
}
|
||||
|
||||
bool IsCompressed() const {
|
||||
return is_compressed;
|
||||
}
|
||||
|
||||
protected:
|
||||
void DecorateSurfaceName() override;
|
||||
|
||||
View CreateView(const ViewParams& view_key) override;
|
||||
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
|
||||
|
||||
private:
|
||||
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
|
||||
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
GLenum internal_format{};
|
||||
GLenum format{};
|
||||
GLenum type{};
|
||||
bool is_compressed{};
|
||||
GLenum target{};
|
||||
u32 view_count{};
|
||||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
OGLTexture texture;
|
||||
OGLBuffer texture_buffer;
|
||||
OGLTextureView store_view;
|
||||
OGLBuffer buffer;
|
||||
GLenum gl_internal_format = GL_NONE;
|
||||
GLenum gl_store_format = GL_NONE;
|
||||
GLenum gl_format = GL_NONE;
|
||||
GLenum gl_type = GL_NONE;
|
||||
};
|
||||
|
||||
class CachedSurfaceView final : public VideoCommon::ViewBase {
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
friend Image;
|
||||
|
||||
public:
|
||||
explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_);
|
||||
~CachedSurfaceView();
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
||||
|
||||
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
|
||||
/// @param attachment Attachment to bind textures to
|
||||
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
|
||||
void Attach(GLenum attachment, GLenum fb_target) const;
|
||||
|
||||
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
|
||||
void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
|
||||
|
||||
void MarkAsModified(u64 tick) {
|
||||
surface.MarkAsModified(true, tick);
|
||||
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
|
||||
return views[static_cast<size_t>(query_type)];
|
||||
}
|
||||
|
||||
GLuint GetTexture() const {
|
||||
if (is_proxy) {
|
||||
return surface.GetTexture();
|
||||
}
|
||||
return main_view.handle;
|
||||
[[nodiscard]] GLuint DefaultHandle() const noexcept {
|
||||
return default_handle;
|
||||
}
|
||||
|
||||
GLenum GetFormat() const {
|
||||
return format;
|
||||
}
|
||||
|
||||
const SurfaceParams& GetSurfaceParams() const {
|
||||
return surface.GetSurfaceParams();
|
||||
[[nodiscard]] GLenum Format() const noexcept {
|
||||
return internal_format;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLTextureView CreateTextureView() const;
|
||||
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
|
||||
const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range);
|
||||
|
||||
CachedSurface& surface;
|
||||
const GLenum format;
|
||||
const GLenum target;
|
||||
const bool is_proxy;
|
||||
|
||||
std::unordered_map<u32, OGLTextureView> view_cache;
|
||||
OGLTextureView main_view;
|
||||
|
||||
// Use an invalid default so it always fails the comparison test
|
||||
u32 current_swizzle = 0xffffffff;
|
||||
GLuint current_view = 0;
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
|
||||
std::vector<OGLTextureView> stored_views;
|
||||
GLuint default_handle = 0;
|
||||
GLenum internal_format = GL_NONE;
|
||||
};
|
||||
|
||||
class TextureCacheOpenGL final : public TextureCacheBase {
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
||||
class Sampler {
|
||||
public:
|
||||
explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
StateTracker& state_tracker);
|
||||
~TextureCacheOpenGL();
|
||||
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
|
||||
|
||||
protected:
|
||||
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
|
||||
|
||||
void ImageCopy(Surface& src_surface, Surface& dst_surface,
|
||||
const VideoCommon::CopyParams& copy_params) override;
|
||||
|
||||
void ImageBlit(View& src_view, View& dst_view,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
|
||||
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
|
||||
GLuint Handle() const noexcept {
|
||||
return sampler.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
GLuint FetchPBO(std::size_t buffer_size);
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLFramebuffer src_framebuffer;
|
||||
OGLFramebuffer dst_framebuffer;
|
||||
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
|
||||
OGLSampler sampler;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] GLbitfield BufferBits() const noexcept {
|
||||
return buffer_bits;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLFramebuffer framebuffer;
|
||||
GLbitfield buffer_bits = GL_NONE;
|
||||
};
|
||||
|
||||
struct TextureCacheParams {
|
||||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||
|
||||
using Runtime = OpenGL::TextureCacheRuntime;
|
||||
using Image = OpenGL::Image;
|
||||
using ImageAlloc = OpenGL::ImageAlloc;
|
||||
using ImageView = OpenGL::ImageView;
|
||||
using Sampler = OpenGL::Sampler;
|
||||
using Framebuffer = OpenGL::Framebuffer;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
|
|||
return GL_FILL;
|
||||
}
|
||||
|
||||
inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
|
||||
switch (filter) {
|
||||
case Tegra::Texture::SamplerReduction::WeightedAverage:
|
||||
return GL_WEIGHTED_AVERAGE_ARB;
|
||||
case Tegra::Texture::SamplerReduction::Min:
|
||||
return GL_MIN;
|
||||
case Tegra::Texture::SamplerReduction::Max:
|
||||
return GL_MAX;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
|
||||
return GL_WEIGHTED_AVERAGE_ARB;
|
||||
}
|
||||
|
||||
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
|
||||
// Enumeration order matches register order. We can convert it arithmetically.
|
||||
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
#include "core/telemetry_session.h"
|
||||
#include "video_core/host_shaders/opengl_present_frag.h"
|
||||
#include "video_core/host_shaders/opengl_present_vert.h"
|
||||
#include "video_core/morton.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
if (!framebuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
PrepareRendertarget(framebuffer);
|
||||
RenderScreenshot();
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
|
||||
state_tracker.BindFramebuffer(0);
|
||||
DrawScreen(emu_window.GetFramebufferLayout());
|
||||
|
||||
++m_current_frame;
|
||||
|
@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
|
|||
// Reset the screen info's display texture to its own permanent texture
|
||||
screen_info.display_texture = screen_info.texture.resource.handle;
|
||||
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
|
||||
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
|
||||
u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
|
||||
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
|
||||
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
|
||||
gl_framebuffer_data.data(), host_ptr);
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
|
||||
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
|
||||
const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
|
||||
const std::span<const u8> input_data(host_ptr, size_in_bytes);
|
||||
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
|
||||
framebuffer.width, framebuffer.height, 1, block_height_log2,
|
||||
0);
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
|
||||
|
||||
// Update existing texture
|
||||
|
@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
|
||||
|
||||
// Generate presentation sampler
|
||||
present_sampler.Create();
|
||||
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
|
||||
// Generate VBO handle for drawing
|
||||
vertex_buffer.Create();
|
||||
|
||||
|
@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
// Clear screen to black
|
||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||
|
||||
// Enable seamless cubemaps when per texture parameters are not available
|
||||
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
|
||||
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
|
||||
}
|
||||
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
|
@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
|||
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
|
||||
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
|
||||
|
||||
GLint internal_format;
|
||||
|
@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
|||
internal_format = GL_RGBA8;
|
||||
texture.gl_format = GL_RGBA;
|
||||
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
|
||||
static_cast<u32>(framebuffer.pixel_format));
|
||||
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
|
||||
// static_cast<u32>(framebuffer.pixel_format));
|
||||
}
|
||||
|
||||
texture.resource.Release();
|
||||
|
@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
state_tracker.NotifyPolygonModes();
|
||||
state_tracker.NotifyViewport0();
|
||||
state_tracker.NotifyScissor0();
|
||||
state_tracker.NotifyColorMask0();
|
||||
state_tracker.NotifyColorMask(0);
|
||||
state_tracker.NotifyBlend0();
|
||||
state_tracker.NotifyFramebuffer();
|
||||
state_tracker.NotifyFrontFace();
|
||||
|
@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
}
|
||||
|
||||
glBindTextureUnit(0, screen_info.display_texture);
|
||||
glBindSampler(0, 0);
|
||||
glBindSampler(0, present_sampler.handle);
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
|
|||
|
||||
DrawScreen(layout);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
renderer_settings.screenshot_bits);
|
||||
|
||||
|
|
|
@ -102,6 +102,7 @@ private:
|
|||
StateTracker state_tracker{gpu};
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
OGLBuffer vertex_buffer;
|
||||
OGLProgram vertex_program;
|
||||
OGLProgram fragment_program;
|
||||
|
|
224
src/video_core/renderer_opengl/util_shaders.cpp
Normal file
224
src/video_core/renderer_opengl/util_shaders.cpp
Normal file
|
@ -0,0 +1,224 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using namespace HostShaders;
|
||||
|
||||
using VideoCommon::Extent3D;
|
||||
using VideoCommon::ImageCopy;
|
||||
using VideoCommon::ImageType;
|
||||
using VideoCommon::SwizzleParameters;
|
||||
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
|
||||
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
|
||||
namespace {
|
||||
|
||||
OGLProgram MakeProgram(std::string_view source) {
|
||||
OGLShader shader;
|
||||
shader.Create(source, GL_COMPUTE_SHADER);
|
||||
|
||||
OGLProgram program;
|
||||
program.Create(true, false, shader.handle);
|
||||
return program;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
: program_manager{program_manager_},
|
||||
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
|
||||
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
||||
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
||||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||
swizzle_table_buffer.Create();
|
||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||
}
|
||||
|
||||
UtilShaders::~UtilShaders() = default;
|
||||
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||
glUniform3uiv(0, 1, params.origin.data());
|
||||
glUniform3iv(1, 1, params.destination.data());
|
||||
glUniform1ui(2, params.bytes_per_block_log2);
|
||||
glUniform1ui(3, params.layer_stride);
|
||||
glUniform1ui(4, params.block_size);
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
|
||||
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
|
||||
|
||||
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
|
||||
glUniform3uiv(0, 1, params.origin.data());
|
||||
glUniform3iv(1, 1, params.destination.data());
|
||||
glUniform1ui(2, params.bytes_per_block_log2);
|
||||
glUniform1ui(3, params.slice_size);
|
||||
glUniform1ui(4, params.block_size);
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glUniform1ui(8, params.block_depth);
|
||||
glUniform1ui(9, params.block_depth_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
static constexpr GLuint LOC_ORIGIN = 0;
|
||||
static constexpr GLuint LOC_DESTINATION = 1;
|
||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
||||
static constexpr GLuint LOC_PITCH = 3;
|
||||
|
||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
||||
const GLenum format = StoreFormat(bytes_per_block);
|
||||
const u32 pitch = image.info.pitch;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||
glUniform1ui(LOC_PITCH, pitch);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
|
||||
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
|
||||
static constexpr GLuint LOC_SRC_OFFSET = 0;
|
||||
static constexpr GLuint LOC_DST_OFFSET = 1;
|
||||
|
||||
program_manager.BindHostCompute(copy_bc4_program.handle);
|
||||
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_subresource.base_layer == 0);
|
||||
ASSERT(copy.src_subresource.num_layers == 1);
|
||||
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||
|
||||
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
|
||||
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
|
||||
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
|
||||
GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
|
||||
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
|
||||
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
GLenum StoreFormat(u32 bytes_per_block) {
|
||||
switch (bytes_per_block) {
|
||||
case 1:
|
||||
return GL_R8UI;
|
||||
case 2:
|
||||
return GL_R16UI;
|
||||
case 4:
|
||||
return GL_R32UI;
|
||||
case 8:
|
||||
return GL_RG32UI;
|
||||
case 16:
|
||||
return GL_RGBA32UI;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return GL_R8UI;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
51
src/video_core/renderer_opengl/util_shaders.h
Normal file
51
src/video_core/renderer_opengl/util_shaders.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Image;
|
||||
class ImageBufferMap;
|
||||
class ProgramManager;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
~UtilShaders();
|
||||
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void CopyBC4(Image& dst_image, Image& src_image,
|
||||
std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
private:
|
||||
ProgramManager& program_manager;
|
||||
|
||||
OGLBuffer swizzle_table_buffer;
|
||||
|
||||
OGLProgram block_linear_unswizzle_2d_program;
|
||||
OGLProgram block_linear_unswizzle_3d_program;
|
||||
OGLProgram pitch_unswizzle_program;
|
||||
OGLProgram copy_bc4_program;
|
||||
};
|
||||
|
||||
GLenum StoreFormat(u32 bytes_per_block);
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,42 +0,0 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
|
||||
if (!GLAD_GL_KHR_debug) {
|
||||
// We don't need to throw an error as this is just for debugging
|
||||
return;
|
||||
}
|
||||
|
||||
std::string object_label;
|
||||
if (extra_info.empty()) {
|
||||
switch (identifier) {
|
||||
case GL_TEXTURE:
|
||||
object_label = fmt::format("Texture@0x{:016X}", addr);
|
||||
break;
|
||||
case GL_PROGRAM:
|
||||
object_label = fmt::format("Shader@0x{:016X}", addr);
|
||||
break;
|
||||
default:
|
||||
object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
|
||||
}
|
||||
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,16 +0,0 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
|
||||
|
||||
} // namespace OpenGL
|
Loading…
Add table
Add a link
Reference in a new issue