commit
98b26b6e12
428 changed files with 49376 additions and 27255 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,29 +0,0 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
enum class ShaderType : u32;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
class Registry;
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||
|
||||
} // namespace OpenGL
|
|
@ -2,14 +2,18 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
|
@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
|
|||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
|
||||
switch (gl_format) {
|
||||
case GL_RGBA8_SNORM:
|
||||
return GL_RGBA8;
|
||||
case GL_R8_SNORM:
|
||||
return GL_R8;
|
||||
case GL_RGBA16_SNORM:
|
||||
return GL_RGBA16;
|
||||
case GL_R16_SNORM:
|
||||
return GL_R16;
|
||||
case GL_RG16_SNORM:
|
||||
return GL_RG16;
|
||||
case GL_RG8_SNORM:
|
||||
return GL_RG8;
|
||||
default:
|
||||
return gl_format;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
|
@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
|
|||
glMakeNamedBufferResidentNV(buffer.handle, access);
|
||||
}
|
||||
|
||||
GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
|
||||
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
|
||||
return offset == view.offset && size == view.size && format == view.format;
|
||||
})};
|
||||
if (it != views.end()) {
|
||||
return it->texture.handle;
|
||||
}
|
||||
OGLTexture texture;
|
||||
texture.Create(GL_TEXTURE_BUFFER);
|
||||
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
|
||||
const GLenum texture_format{GetTextureBufferFormat(gl_format)};
|
||||
if (texture_format != gl_format) {
|
||||
LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
|
||||
}
|
||||
glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
|
||||
views.push_back({
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.format = format,
|
||||
.texture = std::move(texture),
|
||||
});
|
||||
return views.back().texture.handle;
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
||||
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||
use_assembly_shaders{device.UseAssemblyShaders()},
|
||||
|
@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
|
|||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint base_binding = graphics_base_uniform_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
|
@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
|
|||
|
||||
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
if (use_storage_buffers) {
|
||||
const GLuint base_binding = graphics_base_storage_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
|
@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
|
|||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
if (use_storage_buffers) {
|
||||
if (size != 0) {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
||||
}
|
||||
} else {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
|
@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
|
|||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else if (size == 0) {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
|
|||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
PixelFormat format) {
|
||||
*texture_handles++ = buffer.View(offset, size, format);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
|
||||
*image_handles++ = buffer.View(offset, size, format);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -32,6 +32,8 @@ public:
|
|||
|
||||
void MakeResident(GLenum access) noexcept;
|
||||
|
||||
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
||||
|
||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||
return address;
|
||||
}
|
||||
|
@ -41,9 +43,17 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
struct BufferView {
|
||||
u32 offset;
|
||||
u32 size;
|
||||
VideoCore::Surface::PixelFormat format;
|
||||
OGLTexture texture;
|
||||
};
|
||||
|
||||
GLuint64EXT address = 0;
|
||||
OGLBuffer buffer;
|
||||
GLenum current_residency_access = GL_NONE;
|
||||
std::vector<BufferView> views;
|
||||
};
|
||||
|
||||
class BufferCacheRuntime {
|
||||
|
@ -75,17 +85,21 @@ public:
|
|||
|
||||
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
VideoCore::Surface::PixelFormat format);
|
||||
|
||||
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
VideoCore::Surface::PixelFormat format);
|
||||
|
||||
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
|
||||
const GLuint handle = fast_uniforms[stage][binding_index].handle;
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint handle = fast_uniforms[stage][binding_index].handle;
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint base_binding = graphics_base_uniform_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
||||
fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,7 +117,7 @@ public:
|
|||
|
||||
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
|
||||
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint base_binding = graphics_base_uniform_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
|
@ -118,6 +132,27 @@ public:
|
|||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
|
||||
return !use_assembly_shaders;
|
||||
}
|
||||
|
||||
void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
|
||||
graphics_base_uniform_bindings = bindings;
|
||||
}
|
||||
|
||||
void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
|
||||
graphics_base_storage_bindings = bindings;
|
||||
}
|
||||
|
||||
void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
|
||||
texture_handles = texture_handles_;
|
||||
image_handles = image_handles_;
|
||||
}
|
||||
|
||||
void SetEnableStorageBuffers(bool use_storage_buffers_) {
|
||||
use_storage_buffers = use_storage_buffers_;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@ -131,8 +166,15 @@ private:
|
|||
bool use_assembly_shaders = false;
|
||||
bool has_unified_vertex_buffers = false;
|
||||
|
||||
bool use_storage_buffers = false;
|
||||
|
||||
u32 max_attributes = 0;
|
||||
|
||||
std::array<GLuint, 5> graphics_base_uniform_bindings{};
|
||||
std::array<GLuint, 5> graphics_base_storage_bindings{};
|
||||
GLuint* texture_handles = nullptr;
|
||||
GLuint* image_handles = nullptr;
|
||||
|
||||
std::optional<StreamBuffer> stream_buffer;
|
||||
|
||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||
|
@ -156,6 +198,7 @@ struct BufferCacheParams {
|
|||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = false;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
|
209
src/video_core/renderer_opengl/gl_compute_pipeline.cpp
Normal file
209
src/video_core/renderer_opengl/gl_compute_pipeline.cpp
Normal file
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "common/settings.h" // for enum class Settings::ShaderBackend
|
||||
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
using VideoCommon::ImageId;
|
||||
|
||||
constexpr u32 MAX_TEXTURES = 64;
|
||||
constexpr u32 MAX_IMAGES = 16;
|
||||
|
||||
template <typename Range>
|
||||
u32 AccumulateCount(const Range& range) {
|
||||
u32 num{};
|
||||
for (const auto& desc : range) {
|
||||
num += desc.count;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
size_t ComputePipelineKey::Hash() const noexcept {
|
||||
return static_cast<size_t>(
|
||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
|
||||
}
|
||||
|
||||
bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
|
||||
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
|
||||
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
ProgramManager& program_manager_, const Shader::Info& info_,
|
||||
std::string code, std::vector<u32> code_v)
|
||||
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
|
||||
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
|
||||
switch (device.GetShaderBackend()) {
|
||||
case Settings::ShaderBackend::GLSL:
|
||||
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
|
||||
break;
|
||||
case Settings::ShaderBackend::GLASM:
|
||||
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
|
||||
break;
|
||||
case Settings::ShaderBackend::SPIRV:
|
||||
source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
|
||||
break;
|
||||
}
|
||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||
uniform_buffer_sizes.begin());
|
||||
|
||||
num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
|
||||
num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
|
||||
|
||||
const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
|
||||
ASSERT(num_textures <= MAX_TEXTURES);
|
||||
|
||||
const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
|
||||
ASSERT(num_images <= MAX_IMAGES);
|
||||
|
||||
const bool is_glasm{assembly_program.handle != 0};
|
||||
const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
|
||||
use_storage_buffers =
|
||||
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
|
||||
writes_global_memory = !use_storage_buffers &&
|
||||
std::ranges::any_of(info.storage_buffers_descriptors,
|
||||
[](const auto& desc) { return desc.is_written; });
|
||||
}
|
||||
|
||||
void ComputePipeline::Configure() {
|
||||
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
size_t ssbo_index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
|
||||
desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
|
||||
boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
|
||||
std::array<GLuint, MAX_TEXTURES> samplers;
|
||||
std::array<GLuint, MAX_TEXTURES> textures;
|
||||
std::array<GLuint, MAX_IMAGES> images;
|
||||
GLsizei sampler_binding{};
|
||||
GLsizei texture_binding{};
|
||||
GLsizei image_binding{};
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& cbufs{qmd.const_buffer_config};
|
||||
const bool via_header_index{qmd.linked_tsc != 0};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
const u32 offset{desc.cbuf_offset + index_offset};
|
||||
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
|
||||
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
|
||||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
|
||||
if (desc.has_secondary) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
|
||||
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
|
||||
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
|
||||
secondary_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
|
||||
return TexturePair(lhs_raw | rhs_raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
}
|
||||
}};
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
samplers[sampler_binding++] = 0;
|
||||
}
|
||||
}
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_image);
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||
samplers[sampler_binding++] = sampler->Handle();
|
||||
}
|
||||
}
|
||||
std::ranges::for_each(info.image_descriptors, add_image);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
if (assembly_program.handle != 0) {
|
||||
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
|
||||
} else {
|
||||
program_manager.BindComputeProgram(source_program.handle);
|
||||
}
|
||||
buffer_cache.UnbindComputeTextureBuffers();
|
||||
size_t texbuf_index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
|
||||
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written, is_image);
|
||||
++texbuf_index;
|
||||
}
|
||||
}};
|
||||
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
|
||||
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
|
||||
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
|
||||
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
|
||||
texture_binding += num_texture_buffers;
|
||||
image_binding += num_image_buffers;
|
||||
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
textures[texture_binding++] = image_view.Handle(desc.type);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
if (desc.is_written) {
|
||||
texture_cache.MarkModification(image_view.image_id);
|
||||
}
|
||||
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
|
||||
}
|
||||
}
|
||||
if (texture_binding != 0) {
|
||||
ASSERT(texture_binding == sampler_binding);
|
||||
glBindTextures(0, texture_binding, textures.data());
|
||||
glBindSamplers(0, sampler_binding, samplers.data());
|
||||
}
|
||||
if (image_binding != 0) {
|
||||
glBindImageTextures(0, image_binding, images.data());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
93
src/video_core/renderer_opengl/gl_compute_pipeline.h
Normal file
93
src/video_core/renderer_opengl/gl_compute_pipeline.h
Normal file
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
class KeplerCompute;
|
||||
}
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
|
||||
struct ComputePipelineKey {
|
||||
u64 unique_hash;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const ComputePipelineKey&) const noexcept;
|
||||
|
||||
bool operator!=(const ComputePipelineKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
|
||||
static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
|
||||
static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
|
||||
|
||||
class ComputePipeline {
|
||||
public:
|
||||
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
|
||||
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
ProgramManager& program_manager_, const Shader::Info& info_,
|
||||
std::string code, std::vector<u32> code_v);
|
||||
|
||||
void Configure();
|
||||
|
||||
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
|
||||
return writes_global_memory;
|
||||
}
|
||||
|
||||
private:
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
ProgramManager& program_manager;
|
||||
|
||||
Shader::Info info;
|
||||
OGLProgram source_program;
|
||||
OGLAssemblyProgram assembly_program;
|
||||
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
|
||||
|
||||
u32 num_texture_buffers{};
|
||||
u32 num_image_buffers{};
|
||||
|
||||
bool use_storage_buffers{};
|
||||
bool writes_global_memory{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<OpenGL::ComputePipelineKey> {
|
||||
size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
|
@ -17,39 +17,17 @@
|
|||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LIMIT_UBOS = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SSBOS = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SAMPLERS = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
|
||||
};
|
||||
constexpr std::array LIMIT_IMAGES = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
|
@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
|
|||
return std::ranges::find(extensions, extension) != extensions.end();
|
||||
}
|
||||
|
||||
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
ASSERT(num >= amount);
|
||||
if (limit) {
|
||||
amount = std::min(amount, GetInteger<u32>(*limit));
|
||||
}
|
||||
num -= amount;
|
||||
return std::exchange(base, base + amount);
|
||||
}
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Shader::MaxStageTypes> max;
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
|
||||
return max;
|
||||
}
|
||||
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
||||
|
||||
static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
|
||||
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
|
||||
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
|
||||
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
|
||||
|
||||
u32 num_ubos = total_ubos - ReservedUniformBlocks;
|
||||
u32 num_ssbos = total_ssbos;
|
||||
u32 num_samplers = total_samplers;
|
||||
|
||||
u32 base_ubo = ReservedUniformBlocks;
|
||||
u32 base_ssbo = 0;
|
||||
u32 base_samplers = 0;
|
||||
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
bindings[stage] = {
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages,
|
||||
LIMIT_SAMPLERS[stage])};
|
||||
}
|
||||
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
u32 base_images = 0;
|
||||
|
||||
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
|
||||
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
|
||||
// fragment stage, and at least 1 for the rest of the stages.
|
||||
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
|
||||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
||||
// Reserve the other image bindings.
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
if (stage == 4) {
|
||||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
|
||||
}
|
||||
|
||||
// Compute doesn't care about any of this.
|
||||
bindings[5] = {0, 0, 0, 0};
|
||||
|
||||
return bindings;
|
||||
}
|
||||
|
||||
bool IsASTCSupported() {
|
||||
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
|
||||
static constexpr std::array formats = {
|
||||
static constexpr std::array targets{
|
||||
GL_TEXTURE_2D,
|
||||
GL_TEXTURE_2D_ARRAY,
|
||||
};
|
||||
static constexpr std::array formats{
|
||||
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
|
||||
|
@ -172,11 +87,10 @@ bool IsASTCSupported() {
|
|||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
|
||||
};
|
||||
static constexpr std::array required_support = {
|
||||
static constexpr std::array required_support{
|
||||
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
|
||||
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
|
||||
};
|
||||
|
||||
for (const GLenum target : targets) {
|
||||
for (const GLenum format : formats) {
|
||||
for (const GLenum support : required_support) {
|
||||
|
@ -223,14 +137,13 @@ Device::Device() {
|
|||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
max_uniform_buffers = BuildMaxUniformBuffers();
|
||||
base_bindings = BuildBaseBindings();
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
||||
max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
|
||||
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
|
||||
GLAD_GL_NV_shader_thread_shuffle;
|
||||
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
||||
|
@ -243,18 +156,30 @@ Device::Device() {
|
|||
has_precise_bug = TestPreciseBug();
|
||||
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
has_derivative_control = GLAD_GL_ARB_derivative_control;
|
||||
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
|
||||
has_debugging_tool_attached = IsDebugToolAttached(extensions);
|
||||
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
|
||||
has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
|
||||
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
|
||||
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
|
||||
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
|
||||
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
|
||||
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
|
||||
need_fastmath_off = is_nvidia;
|
||||
|
||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||
// uniform buffers as "push constants"
|
||||
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
|
||||
shader_backend = Settings::values.shader_backend.GetValue();
|
||||
use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
|
||||
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
|
||||
|
||||
if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
|
||||
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
|
||||
shader_backend = Settings::ShaderBackend::GLSL;
|
||||
}
|
||||
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
|
||||
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
|
||||
!(is_amd || (is_intel && !is_linux));
|
||||
|
@ -265,11 +190,6 @@ Device::Device() {
|
|||
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
|
||||
has_broken_texture_view_formats);
|
||||
|
||||
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
|
||||
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
|
||||
}
|
||||
|
||||
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
|
||||
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
|
||||
}
|
||||
|
@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
|
|||
return vendor_name;
|
||||
}
|
||||
|
||||
Device::Device(std::nullptr_t) {
|
||||
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
|
||||
uniform_buffer_alignment = 4;
|
||||
shader_storage_alignment = 4;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
max_compute_shared_memory_size = 0x10000;
|
||||
has_warp_intrinsics = true;
|
||||
has_shader_ballot = true;
|
||||
has_vertex_viewport_layer = true;
|
||||
has_image_load_formatted = true;
|
||||
has_texture_shadow_lod = true;
|
||||
has_variable_aoffi = true;
|
||||
has_depth_buffer_float = true;
|
||||
}
|
||||
|
||||
bool Device::TestVariableAoffi() {
|
||||
return TestProgram(R"(#version 430 core
|
||||
// This is a unit test, please ignore me on apitrace bug reports.
|
||||
|
|
|
@ -6,34 +6,22 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
|
||||
namespace Settings {
|
||||
enum class ShaderBackend : u32;
|
||||
};
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device {
|
||||
public:
|
||||
struct BaseBindings {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
};
|
||||
|
||||
explicit Device();
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
[[nodiscard]] std::string GetVendorName() const;
|
||||
|
||||
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||
return base_bindings[stage_index];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
|
||||
return max_uniform_buffers[static_cast<size_t>(stage)];
|
||||
}
|
||||
|
||||
size_t GetUniformBufferAlignment() const {
|
||||
|
@ -56,6 +44,10 @@ public:
|
|||
return max_compute_shared_memory_size;
|
||||
}
|
||||
|
||||
u32 GetMaxGLASMStorageBufferBlocks() const {
|
||||
return max_glasm_storage_buffer_blocks;
|
||||
}
|
||||
|
||||
bool HasWarpIntrinsics() const {
|
||||
return has_warp_intrinsics;
|
||||
}
|
||||
|
@ -108,6 +100,10 @@ public:
|
|||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool HasDerivativeControl() const {
|
||||
return has_derivative_control;
|
||||
}
|
||||
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_debugging_tool_attached;
|
||||
}
|
||||
|
@ -128,18 +124,52 @@ public:
|
|||
return has_depth_buffer_float;
|
||||
}
|
||||
|
||||
bool HasGeometryShaderPassthrough() const {
|
||||
return has_geometry_shader_passthrough;
|
||||
}
|
||||
|
||||
bool HasNvGpuShader5() const {
|
||||
return has_nv_gpu_shader_5;
|
||||
}
|
||||
|
||||
bool HasShaderInt64() const {
|
||||
return has_shader_int64;
|
||||
}
|
||||
|
||||
bool HasAmdShaderHalfFloat() const {
|
||||
return has_amd_shader_half_float;
|
||||
}
|
||||
|
||||
bool HasSparseTexture2() const {
|
||||
return has_sparse_texture_2;
|
||||
}
|
||||
|
||||
bool IsWarpSizePotentiallyLargerThanGuest() const {
|
||||
return warp_size_potentially_larger_than_guest;
|
||||
}
|
||||
|
||||
bool NeedsFastmathOff() const {
|
||||
return need_fastmath_off;
|
||||
}
|
||||
|
||||
Settings::ShaderBackend GetShaderBackend() const {
|
||||
return shader_backend;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestPreciseBug();
|
||||
|
||||
std::string vendor_name;
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
|
||||
size_t uniform_buffer_alignment{};
|
||||
size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
u32 max_compute_shared_memory_size{};
|
||||
u32 max_glasm_storage_buffer_blocks{};
|
||||
|
||||
Settings::ShaderBackend shader_backend{};
|
||||
|
||||
bool has_warp_intrinsics{};
|
||||
bool has_shader_ballot{};
|
||||
bool has_vertex_viewport_layer{};
|
||||
|
@ -153,11 +183,21 @@ private:
|
|||
bool has_broken_texture_view_formats{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool has_derivative_control{};
|
||||
bool has_debugging_tool_attached{};
|
||||
bool use_assembly_shaders{};
|
||||
bool use_asynchronous_shaders{};
|
||||
bool use_driver_cache{};
|
||||
bool has_depth_buffer_float{};
|
||||
bool has_geometry_shader_passthrough{};
|
||||
bool has_nv_gpu_shader_5{};
|
||||
bool has_shader_int64{};
|
||||
bool has_amd_shader_half_float{};
|
||||
bool has_sparse_texture_2{};
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
bool need_fastmath_off{};
|
||||
|
||||
std::string vendor_name;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
572
src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
Normal file
572
src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
Normal file
|
@ -0,0 +1,572 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/settings.h" // for enum class Settings::ShaderBackend
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
#if defined(_MSC_VER) && defined(NDEBUG)
|
||||
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
|
||||
#else
|
||||
#define LAMBDA_FORCEINLINE
|
||||
#endif
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Shader::ImageDescriptor;
|
||||
using Shader::TextureBufferDescriptor;
|
||||
using Shader::TextureDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
using VideoCommon::ImageId;
|
||||
|
||||
constexpr u32 MAX_TEXTURES = 64;
|
||||
constexpr u32 MAX_IMAGES = 8;
|
||||
|
||||
template <typename Range>
|
||||
u32 AccumulateCount(const Range& range) {
|
||||
u32 num{};
|
||||
for (const auto& desc : range) {
|
||||
num += desc.count;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
GLenum Stage(size_t stage_index) {
|
||||
switch (stage_index) {
|
||||
case 0:
|
||||
return GL_VERTEX_SHADER;
|
||||
case 1:
|
||||
return GL_TESS_CONTROL_SHADER;
|
||||
case 2:
|
||||
return GL_TESS_EVALUATION_SHADER;
|
||||
case 3:
|
||||
return GL_GEOMETRY_SHADER;
|
||||
case 4:
|
||||
return GL_FRAGMENT_SHADER;
|
||||
}
|
||||
UNREACHABLE_MSG("{}", stage_index);
|
||||
return GL_NONE;
|
||||
}
|
||||
|
||||
GLenum AssemblyStage(size_t stage_index) {
|
||||
switch (stage_index) {
|
||||
case 0:
|
||||
return GL_VERTEX_PROGRAM_NV;
|
||||
case 1:
|
||||
return GL_TESS_CONTROL_PROGRAM_NV;
|
||||
case 2:
|
||||
return GL_TESS_EVALUATION_PROGRAM_NV;
|
||||
case 3:
|
||||
return GL_GEOMETRY_PROGRAM_NV;
|
||||
case 4:
|
||||
return GL_FRAGMENT_PROGRAM_NV;
|
||||
}
|
||||
UNREACHABLE_MSG("{}", stage_index);
|
||||
return GL_NONE;
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
|
||||
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
|
||||
const u8 index = location / 4;
|
||||
if (index >= 8 && index <= 39) {
|
||||
return {GL_GENERIC_ATTRIB_NV, index - 8};
|
||||
}
|
||||
if (index >= 48 && index <= 55) {
|
||||
return {GL_TEXTURE_COORD_NV, index - 48};
|
||||
}
|
||||
switch (index) {
|
||||
case 7:
|
||||
return {GL_POSITION, 0};
|
||||
case 40:
|
||||
return {GL_PRIMARY_COLOR_NV, 0};
|
||||
case 41:
|
||||
return {GL_SECONDARY_COLOR_NV, 0};
|
||||
case 42:
|
||||
return {GL_BACK_PRIMARY_COLOR_NV, 0};
|
||||
case 43:
|
||||
return {GL_BACK_SECONDARY_COLOR_NV, 0};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("index={}", index);
|
||||
return {GL_POSITION, 0};
|
||||
}
|
||||
|
||||
template <typename Spec>
|
||||
bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
|
||||
for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
|
||||
if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
|
||||
return false;
|
||||
}
|
||||
const auto& info{stage_infos[stage]};
|
||||
if constexpr (!Spec::has_storage_buffers) {
|
||||
if (!info.storage_buffers_descriptors.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if constexpr (!Spec::has_texture_buffers) {
|
||||
if (!info.texture_buffer_descriptors.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if constexpr (!Spec::has_image_buffers) {
|
||||
if (!info.image_buffer_descriptors.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if constexpr (!Spec::has_images) {
|
||||
if (!info.image_descriptors.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
|
||||
|
||||
template <typename Spec, typename... Specs>
|
||||
ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
|
||||
if constexpr (sizeof...(Specs) > 0) {
|
||||
if (!Passes<Spec>(stage_infos, enabled_mask)) {
|
||||
return FindSpec<Specs...>(stage_infos, enabled_mask);
|
||||
}
|
||||
}
|
||||
return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
|
||||
}
|
||||
|
||||
struct SimpleVertexFragmentSpec {
|
||||
static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
|
||||
static constexpr bool has_storage_buffers = false;
|
||||
static constexpr bool has_texture_buffers = false;
|
||||
static constexpr bool has_image_buffers = false;
|
||||
static constexpr bool has_images = false;
|
||||
};
|
||||
|
||||
struct SimpleVertexSpec {
|
||||
static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
|
||||
static constexpr bool has_storage_buffers = false;
|
||||
static constexpr bool has_texture_buffers = false;
|
||||
static constexpr bool has_image_buffers = false;
|
||||
static constexpr bool has_images = false;
|
||||
};
|
||||
|
||||
struct DefaultSpec {
|
||||
static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
|
||||
static constexpr bool has_storage_buffers = true;
|
||||
static constexpr bool has_texture_buffers = true;
|
||||
static constexpr bool has_image_buffers = true;
|
||||
static constexpr bool has_images = true;
|
||||
};
|
||||
|
||||
ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
|
||||
return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(
|
||||
const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
|
||||
VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
|
||||
std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
|
||||
const GraphicsPipelineKey& key_)
|
||||
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
|
||||
gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
|
||||
state_tracker{state_tracker_}, key{key_} {
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderBuilding();
|
||||
}
|
||||
u32 num_textures{};
|
||||
u32 num_images{};
|
||||
u32 num_storage_buffers{};
|
||||
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
|
||||
auto& info{stage_infos[stage]};
|
||||
if (infos[stage]) {
|
||||
info = *infos[stage];
|
||||
enabled_stages_mask |= 1u << stage;
|
||||
}
|
||||
if (stage < 4) {
|
||||
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
|
||||
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
|
||||
|
||||
base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
|
||||
base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
|
||||
}
|
||||
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
|
||||
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||
|
||||
const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
|
||||
num_texture_buffers[stage] += num_tex_buffer_bindings;
|
||||
num_textures += num_tex_buffer_bindings;
|
||||
|
||||
const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
|
||||
num_image_buffers[stage] += num_img_buffers_bindings;
|
||||
num_images += num_img_buffers_bindings;
|
||||
|
||||
num_textures += AccumulateCount(info.texture_descriptors);
|
||||
num_images += AccumulateCount(info.image_descriptors);
|
||||
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
|
||||
|
||||
writes_global_memory |= std::ranges::any_of(
|
||||
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
|
||||
}
|
||||
ASSERT(num_textures <= MAX_TEXTURES);
|
||||
ASSERT(num_images <= MAX_IMAGES);
|
||||
|
||||
const bool assembly_shaders{assembly_programs[0].handle != 0};
|
||||
use_storage_buffers =
|
||||
!assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
|
||||
writes_global_memory &= !use_storage_buffers;
|
||||
configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
|
||||
|
||||
if (key.xfb_enabled && device.UseAssemblyShaders()) {
|
||||
GenerateTransformFeedbackState();
|
||||
}
|
||||
const bool in_parallel = thread_worker != nullptr;
|
||||
const auto backend = device.GetShaderBackend();
|
||||
auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
|
||||
shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
|
||||
for (size_t stage = 0; stage < 5; ++stage) {
|
||||
switch (backend) {
|
||||
case Settings::ShaderBackend::GLSL:
|
||||
if (!sources[stage].empty()) {
|
||||
source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
|
||||
}
|
||||
break;
|
||||
case Settings::ShaderBackend::GLASM:
|
||||
if (!sources[stage].empty()) {
|
||||
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
|
||||
if (in_parallel) {
|
||||
// Make sure program is built before continuing when building in parallel
|
||||
glGetString(GL_PROGRAM_ERROR_STRING_NV);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Settings::ShaderBackend::SPIRV:
|
||||
if (!sources_spirv[stage].empty()) {
|
||||
source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
|
||||
// Make sure programs have built if we are building shaders in parallel
|
||||
for (OGLProgram& program : source_programs) {
|
||||
if (program.handle != 0) {
|
||||
GLint status{};
|
||||
glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderComplete();
|
||||
}
|
||||
is_built = true;
|
||||
built_condvar.notify_one();
|
||||
}};
|
||||
if (thread_worker) {
|
||||
thread_worker->QueueWork(std::move(func));
|
||||
} else {
|
||||
func(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Spec>
|
||||
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
|
||||
std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
|
||||
std::array<GLuint, MAX_TEXTURES> samplers;
|
||||
size_t image_view_index{};
|
||||
GLsizei sampler_binding{};
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
|
||||
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
|
||||
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
|
||||
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
||||
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||
const Shader::Info& info{stage_infos[stage]};
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
if constexpr (Spec::has_storage_buffers) {
|
||||
size_t ssbo_index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
|
||||
desc.cbuf_offset, desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
}
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(cbufs[desc.cbuf_index].enabled);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
const u32 offset{desc.cbuf_offset + index_offset};
|
||||
const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
|
||||
if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
|
||||
std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
|
||||
if (desc.has_secondary) {
|
||||
ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
|
||||
const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
|
||||
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
|
||||
second_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
|
||||
const u32 raw{lhs_raw | rhs_raw};
|
||||
return TexturePair(raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_view_index++] = handle.first;
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::has_texture_buffers) {
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_view_index++] = handle.first;
|
||||
samplers[sampler_binding++] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_image_buffers) {
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
add_image(desc);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_view_index++] = handle.first;
|
||||
|
||||
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
||||
samplers[sampler_binding++] = sampler->Handle();
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_images) {
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
add_image(desc);
|
||||
}
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::enabled_stages[0]) {
|
||||
config_stage(0);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[1]) {
|
||||
config_stage(1);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[2]) {
|
||||
config_stage(2);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[3]) {
|
||||
config_stage(3);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[4]) {
|
||||
config_stage(4);
|
||||
}
|
||||
const std::span indices_span(image_view_indices.data(), image_view_index);
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
|
||||
ImageId* texture_buffer_index{image_view_ids.data()};
|
||||
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||
size_t index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
|
||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
++texture_buffer_index;
|
||||
}
|
||||
}};
|
||||
const Shader::Info& info{stage_infos[stage]};
|
||||
buffer_cache.UnbindGraphicsTextureBuffers(stage);
|
||||
|
||||
if constexpr (Spec::has_texture_buffers) {
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
add_buffer(desc);
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_image_buffers) {
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
add_buffer(desc);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
texture_buffer_index += desc.count;
|
||||
}
|
||||
if constexpr (Spec::has_images) {
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
texture_buffer_index += desc.count;
|
||||
}
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::enabled_stages[0]) {
|
||||
bind_stage_info(0);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[1]) {
|
||||
bind_stage_info(1);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[2]) {
|
||||
bind_stage_info(2);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[3]) {
|
||||
bind_stage_info(3);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[4]) {
|
||||
bind_stage_info(4);
|
||||
}
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
if (!is_built.load(std::memory_order::relaxed)) {
|
||||
WaitForBuild();
|
||||
}
|
||||
if (assembly_programs[0].handle != 0) {
|
||||
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
|
||||
} else {
|
||||
program_manager.BindSourcePrograms(source_programs);
|
||||
}
|
||||
const ImageId* views_it{image_view_ids.data()};
|
||||
GLsizei texture_binding = 0;
|
||||
GLsizei image_binding = 0;
|
||||
std::array<GLuint, MAX_TEXTURES> textures;
|
||||
std::array<GLuint, MAX_IMAGES> images;
|
||||
const auto prepare_stage{[&](size_t stage) {
|
||||
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
|
||||
texture_binding += num_texture_buffers[stage];
|
||||
image_binding += num_image_buffers[stage];
|
||||
|
||||
views_it += num_texture_buffers[stage];
|
||||
views_it += num_image_buffers[stage];
|
||||
|
||||
const auto& info{stage_infos[stage]};
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
textures[texture_binding++] = image_view.Handle(desc.type);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
if (desc.is_written) {
|
||||
texture_cache.MarkModification(image_view.image_id);
|
||||
}
|
||||
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
|
||||
}
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::enabled_stages[0]) {
|
||||
prepare_stage(0);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[1]) {
|
||||
prepare_stage(1);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[2]) {
|
||||
prepare_stage(2);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[3]) {
|
||||
prepare_stage(3);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[4]) {
|
||||
prepare_stage(4);
|
||||
}
|
||||
if (texture_binding != 0) {
|
||||
ASSERT(texture_binding == sampler_binding);
|
||||
glBindTextures(0, texture_binding, textures.data());
|
||||
glBindSamplers(0, sampler_binding, samplers.data());
|
||||
}
|
||||
if (image_binding != 0) {
|
||||
glBindImageTextures(0, image_binding, images.data());
|
||||
}
|
||||
}
|
||||
|
||||
void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
|
||||
glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
|
||||
xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
|
||||
}
|
||||
|
||||
void GraphicsPipeline::GenerateTransformFeedbackState() {
|
||||
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
|
||||
// when this is required.
|
||||
GLint* cursor{xfb_attribs.data()};
|
||||
GLint* current_stream{xfb_streams.data()};
|
||||
|
||||
for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
|
||||
const auto& layout = key.xfb_state.layouts[feedback];
|
||||
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
|
||||
if (layout.varying_count == 0) {
|
||||
continue;
|
||||
}
|
||||
*current_stream = static_cast<GLint>(feedback);
|
||||
if (current_stream != xfb_streams.data()) {
|
||||
// When stepping one stream, push the expected token
|
||||
cursor[0] = GL_NEXT_BUFFER_NV;
|
||||
cursor[1] = 0;
|
||||
cursor[2] = 0;
|
||||
cursor += XFB_ENTRY_STRIDE;
|
||||
}
|
||||
++current_stream;
|
||||
|
||||
const auto& locations = key.xfb_state.varyings[feedback];
|
||||
std::optional<u8> current_index;
|
||||
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
|
||||
const u8 location = locations[offset];
|
||||
const u8 index = location / 4;
|
||||
|
||||
if (current_index == index) {
|
||||
// Increase number of components of the previous attachment
|
||||
++cursor[-2];
|
||||
continue;
|
||||
}
|
||||
current_index = index;
|
||||
|
||||
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
|
||||
cursor[1] = 1;
|
||||
cursor += XFB_ENTRY_STRIDE;
|
||||
}
|
||||
}
|
||||
num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
|
||||
num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
|
||||
}
|
||||
|
||||
void GraphicsPipeline::WaitForBuild() {
|
||||
std::unique_lock lock{built_mutex};
|
||||
built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
169
src/video_core/renderer_opengl/gl_graphics_pipeline.h
Normal file
169
src/video_core/renderer_opengl/gl_graphics_pipeline.h
Normal file
|
@ -0,0 +1,169 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/transform_feedback.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace ShaderContext {
|
||||
struct Context;
|
||||
}
|
||||
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
|
||||
|
||||
struct GraphicsPipelineKey {
|
||||
std::array<u64, 6> unique_hashes;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> xfb_enabled;
|
||||
BitField<1, 1, u32> early_z;
|
||||
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
|
||||
BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
|
||||
BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
|
||||
BitField<10, 1, u32> tessellation_clockwise;
|
||||
};
|
||||
std::array<u32, 3> padding;
|
||||
VideoCommon::TransformFeedbackState xfb_state;
|
||||
|
||||
size_t Hash() const noexcept {
|
||||
return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
|
||||
}
|
||||
|
||||
bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
|
||||
return std::memcmp(this, &rhs, Size()) == 0;
|
||||
}
|
||||
|
||||
bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
if (xfb_enabled != 0) {
|
||||
return sizeof(GraphicsPipelineKey);
|
||||
} else {
|
||||
return offsetof(GraphicsPipelineKey, padding);
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
|
||||
|
||||
class GraphicsPipeline {
|
||||
public:
|
||||
explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
|
||||
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||
ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
|
||||
std::array<std::string, 5> sources,
|
||||
std::array<std::vector<u32>, 5> sources_spirv,
|
||||
const std::array<const Shader::Info*, 5>& infos,
|
||||
const GraphicsPipelineKey& key_);
|
||||
|
||||
void Configure(bool is_indexed) {
|
||||
configure_func(this, is_indexed);
|
||||
}
|
||||
|
||||
void ConfigureTransformFeedback() const {
|
||||
if (num_xfb_attribs != 0) {
|
||||
ConfigureTransformFeedbackImpl();
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
|
||||
return key;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
|
||||
return writes_global_memory;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsBuilt() const noexcept {
|
||||
return is_built.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
template <typename Spec>
|
||||
static auto MakeConfigureSpecFunc() {
|
||||
return [](GraphicsPipeline* pipeline, bool is_indexed) {
|
||||
pipeline->ConfigureImpl<Spec>(is_indexed);
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Spec>
|
||||
void ConfigureImpl(bool is_indexed);
|
||||
|
||||
void ConfigureTransformFeedbackImpl() const;
|
||||
|
||||
void GenerateTransformFeedbackState();
|
||||
|
||||
void WaitForBuild();
|
||||
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
const GraphicsPipelineKey key;
|
||||
|
||||
void (*configure_func)(GraphicsPipeline*, bool){};
|
||||
|
||||
std::array<OGLProgram, 5> source_programs;
|
||||
std::array<OGLAssemblyProgram, 5> assembly_programs;
|
||||
u32 enabled_stages_mask{};
|
||||
|
||||
std::array<Shader::Info, 5> stage_infos{};
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
std::array<u32, 5> base_uniform_bindings{};
|
||||
std::array<u32, 5> base_storage_bindings{};
|
||||
std::array<u32, 5> num_texture_buffers{};
|
||||
std::array<u32, 5> num_image_buffers{};
|
||||
|
||||
bool use_storage_buffers{};
|
||||
bool writes_global_memory{};
|
||||
|
||||
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
|
||||
GLsizei num_xfb_attribs{};
|
||||
GLsizei num_xfb_strides{};
|
||||
std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
|
||||
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
|
||||
|
||||
std::mutex built_mutex;
|
||||
std::condition_variable built_condvar;
|
||||
std::atomic_bool is_built{false};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<OpenGL::GraphicsPipelineKey> {
|
||||
size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
|
@ -23,7 +23,6 @@
|
|||
#include "core/memory.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
|
@ -40,7 +39,6 @@ namespace OpenGL {
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using GLvec4 = std::array<GLfloat, 4>;
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
|
|||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
|
||||
struct TextureHandle {
|
||||
constexpr TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
template <typename Engine, typename Entry>
|
||||
TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
|
||||
ShaderType shader_type, size_t index = 0) {
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
const u32 buffer_2 = entry.secondary_buffer;
|
||||
const u32 offset_1 = entry.offset;
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return TextureHandle(handle_1 | handle_2, via_header_index);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return TextureHandle(raw, via_header_index);
|
||||
}
|
||||
const u32 buffer = engine.GetBoundBuffer();
|
||||
const u64 offset = (entry.offset + index) * sizeof(u32);
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
|
||||
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
|
||||
const u8 index = location / 4;
|
||||
if (index >= 8 && index <= 39) {
|
||||
return {GL_GENERIC_ATTRIB_NV, index - 8};
|
||||
}
|
||||
if (index >= 48 && index <= 55) {
|
||||
return {GL_TEXTURE_COORD_NV, index - 48};
|
||||
}
|
||||
switch (index) {
|
||||
case 7:
|
||||
return {GL_POSITION, 0};
|
||||
case 40:
|
||||
return {GL_PRIMARY_COLOR_NV, 0};
|
||||
case 41:
|
||||
return {GL_SECONDARY_COLOR_NV, 0};
|
||||
case 42:
|
||||
return {GL_BACK_PRIMARY_COLOR_NV, 0};
|
||||
case 43:
|
||||
return {GL_BACK_SECONDARY_COLOR_NV, 0};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("index={}", index);
|
||||
return {GL_POSITION, 0};
|
||||
}
|
||||
|
||||
void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::TextureType::Texture1D:
|
||||
return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
|
||||
case Tegra::Shader::TextureType::Texture2D:
|
||||
return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
|
||||
case Tegra::Shader::TextureType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::TextureType::TextureCube:
|
||||
return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
return ImageViewType::e1D;
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
return ImageViewType::e1DArray;
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return ImageViewType::e2D;
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
return ImageViewType::e2DArray;
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
|
@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
|
||||
buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
|
||||
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window_) {
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
}
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
|
@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
|
|||
const auto gl_index = static_cast<GLuint>(index);
|
||||
|
||||
// Disable constant attributes.
|
||||
if (attrib.IsConstant()) {
|
||||
if (attrib.constant) {
|
||||
glDisableVertexAttribArray(gl_index);
|
||||
continue;
|
||||
}
|
||||
|
@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = maxwell3d.regs.shader_config[index];
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
||||
// Skip stages that are not enabled
|
||||
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
program_manager.UseGeometryShader(0);
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
program_manager.UseFragmentShader(0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Currently this stages are not supported in the OpenGL backend.
|
||||
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
||||
if (program == Maxwell::ShaderProgram::TesselationControl ||
|
||||
program == Maxwell::ShaderProgram::TesselationEval) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
program_manager.UseVertexShader(program_handle);
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
program_manager.UseGeometryShader(program_handle);
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
program_manager.UseFragmentShader(program_handle);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
break;
|
||||
}
|
||||
|
||||
// Stage indices are 0 - 5
|
||||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
|
||||
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
|
||||
buffer.cbuf_offset, buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||
// clip distances only when it's written by a shader stage.
|
||||
clip_distances |= shader->GetEntries().clip_distances;
|
||||
|
||||
// When VertexA is enabled, we have dual vertex shaders
|
||||
if (program == Maxwell::ShaderProgram::VertexA) {
|
||||
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
||||
++index;
|
||||
}
|
||||
}
|
||||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
const auto& base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
|
||||
shader_cache.LoadDiskResources(title_id, stop_loading, callback);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
|
@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
|
||||
SyncState();
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
SetupShaders(is_indexed);
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
pipeline->Configure(is_indexed);
|
||||
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
BeginTransformFeedback(primitive_mode);
|
||||
BeginTransformFeedback(pipeline, primitive_mode);
|
||||
|
||||
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
|
||||
const GLsizei num_instances =
|
||||
|
@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
num_instances, base_instance);
|
||||
}
|
||||
}
|
||||
|
||||
EndTransformFeedback();
|
||||
|
||||
++num_queued_commands;
|
||||
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||
|
||||
gpu.TickWork();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const auto& entries = kernel->GetEntries();
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : entries.global_memory_entries) {
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
|
||||
buffer.is_written);
|
||||
++ssbo_index;
|
||||
void RasterizerOpenGL::DispatchCompute() {
|
||||
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
pipeline->Configure();
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
|
||||
++num_queued_commands;
|
||||
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||
|
@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
|
||||
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::TiledCacheBarrier() {
|
||||
|
@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
|
|||
return;
|
||||
}
|
||||
num_queued_commands = 0;
|
||||
|
||||
// Make sure memory stored from the previous GL command stream is visible
|
||||
// This is only needed on assembly shaders where we write to GPU memory with raw pointers
|
||||
if (has_written_global_memory) {
|
||||
has_written_global_memory = false;
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
}
|
||||
glFlush();
|
||||
}
|
||||
|
||||
|
@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
|
||||
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
|
||||
|
||||
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
|
||||
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
|
||||
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
|
||||
GLuint base_image, size_t& image_view_index,
|
||||
size_t& texture_index, size_t& image_index) {
|
||||
const GLuint* const samplers = sampler_handles.data() + texture_index;
|
||||
const GLuint* const textures = texture_handles.data() + texture_index;
|
||||
const GLuint* const images = image_handles.data() + image_index;
|
||||
|
||||
const size_t num_samplers = entries.samplers.size();
|
||||
for (const auto& sampler : entries.samplers) {
|
||||
for (size_t i = 0; i < sampler.size; ++i) {
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
|
||||
texture_handles[texture_index++] = handle;
|
||||
}
|
||||
}
|
||||
const size_t num_images = entries.images.size();
|
||||
for (size_t unit = 0; unit < num_images; ++unit) {
|
||||
// TODO: Mark as modified
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
|
||||
image_handles[image_index] = handle;
|
||||
++image_index;
|
||||
}
|
||||
if (num_samplers > 0) {
|
||||
glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
|
||||
glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
|
||||
}
|
||||
if (num_images > 0) {
|
||||
glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().samplers) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
for (size_t index = 0; index < entry.size; ++index) {
|
||||
const auto handle =
|
||||
GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
|
||||
const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : kernel->GetEntries().samplers) {
|
||||
for (size_t i = 0; i < entry.size; ++i) {
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
|
||||
const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncState() {
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
|
@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
|
|||
|
||||
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
|
||||
if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::ClipDistances] = false;
|
||||
|
@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
|
|||
oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncTransformFeedback() {
|
||||
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
|
||||
// when this is required.
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
static constexpr std::size_t STRIDE = 3;
|
||||
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
|
||||
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
|
||||
|
||||
GLint* cursor = attribs.data();
|
||||
GLint* current_stream = streams.data();
|
||||
|
||||
for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
|
||||
const auto& layout = regs.tfb_layouts[feedback];
|
||||
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
|
||||
if (layout.varying_count == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
*current_stream = static_cast<GLint>(feedback);
|
||||
if (current_stream != streams.data()) {
|
||||
// When stepping one stream, push the expected token
|
||||
cursor[0] = GL_NEXT_BUFFER_NV;
|
||||
cursor[1] = 0;
|
||||
cursor[2] = 0;
|
||||
cursor += STRIDE;
|
||||
}
|
||||
++current_stream;
|
||||
|
||||
const auto& locations = regs.tfb_varying_locs[feedback];
|
||||
std::optional<u8> current_index;
|
||||
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
|
||||
const u8 location = locations[offset];
|
||||
const u8 index = location / 4;
|
||||
|
||||
if (current_index == index) {
|
||||
// Increase number of components of the previous attachment
|
||||
++cursor[-2];
|
||||
continue;
|
||||
}
|
||||
current_index = index;
|
||||
|
||||
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
|
||||
cursor[1] = 1;
|
||||
cursor += STRIDE;
|
||||
}
|
||||
}
|
||||
|
||||
const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
|
||||
const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
|
||||
glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
|
||||
GL_INTERLEAVED_ATTRIBS);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
SyncTransformFeedback();
|
||||
}
|
||||
program->ConfigureTransformFeedback();
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
|
@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::EndTransformFeedback() {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
if (maxwell3d.regs.tfb_enabled != 0) {
|
||||
glEndTransformFeedback();
|
||||
}
|
||||
glEndTransformFeedback();
|
||||
}
|
||||
|
||||
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
|
||||
|
|
|
@ -28,11 +28,9 @@
|
|||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core::Memory {
|
||||
|
@ -81,7 +79,7 @@ public:
|
|||
|
||||
void Draw(bool is_indexed, bool is_instanced) override;
|
||||
void Clear() override;
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
|
@ -118,36 +116,11 @@ public:
|
|||
return num_queued_commands > 0;
|
||||
}
|
||||
|
||||
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
|
||||
return async_shaders;
|
||||
}
|
||||
|
||||
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
|
||||
return async_shaders;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
void BindComputeTextures(Shader* kernel);
|
||||
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(const Shader* kernel);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs state to match guest's
|
||||
void SyncState();
|
||||
|
||||
|
@ -220,18 +193,12 @@ private:
|
|||
/// Syncs vertex instances to match the guest state
|
||||
void SyncVertexInstances();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
|
||||
/// Begin a transform feedback
|
||||
void BeginTransformFeedback(GLenum primitive_mode);
|
||||
void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
|
||||
|
||||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
void SetupShaders(bool is_indexed);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
|
@ -246,13 +213,11 @@ private:
|
|||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
ShaderCache shader_cache;
|
||||
QueryCache query_cache;
|
||||
AccelerateDMA accelerate_dma;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
|
@ -260,7 +225,8 @@ private:
|
|||
std::array<GLuint, MAX_IMAGES> image_handles{};
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
size_t num_queued_commands = 0;
|
||||
bool has_written_global_memory = false;
|
||||
|
||||
u32 last_clip_distance_mask = 0;
|
||||
};
|
||||
|
|
|
@ -83,18 +83,6 @@ void OGLSampler::Release() {
|
|||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLShader::Create(std::string_view source, GLenum type) {
|
||||
if (handle != 0) {
|
||||
return;
|
||||
}
|
||||
if (source.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
handle = GLShader::LoadShader(source, type);
|
||||
}
|
||||
|
||||
void OGLShader::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
@ -104,21 +92,6 @@ void OGLShader::Release() {
|
|||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
|
||||
const char* frag_shader, bool separable_program,
|
||||
bool hint_retrievable) {
|
||||
OGLShader vert, geo, frag;
|
||||
if (vert_shader)
|
||||
vert.Create(vert_shader, GL_VERTEX_SHADER);
|
||||
if (geo_shader)
|
||||
geo.Create(geo_shader, GL_GEOMETRY_SHADER);
|
||||
if (frag_shader)
|
||||
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
|
||||
}
|
||||
|
||||
void OGLProgram::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <utility>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -128,8 +127,6 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
void Create(std::string_view source, GLenum type);
|
||||
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
|
@ -151,17 +148,6 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
void Create(bool separable_program, bool hint_retrievable, T... shaders) {
|
||||
if (handle != 0)
|
||||
return;
|
||||
handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
|
||||
bool separable_program = false, bool hint_retrievable = false);
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,157 +5,93 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <filesystem>
|
||||
#include <stop_token>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/host_translate_info.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
|
||||
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_context.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class AsyncShaders;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
class RasterizerOpenGL;
|
||||
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ProgramHandle {
|
||||
OGLProgram source_program;
|
||||
OGLAssemblyProgram assembly_program;
|
||||
};
|
||||
using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
|
||||
|
||||
struct PrecompiledShader {
|
||||
ProgramSharedPtr program;
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
struct ShaderParameters {
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::ConstBufferEngineInterface& engine;
|
||||
ShaderDiskCacheOpenGL& disk_cache;
|
||||
const Device& device;
|
||||
VAddr cpu_addr;
|
||||
const u8* host_ptr;
|
||||
u64 unique_identifier;
|
||||
};
|
||||
|
||||
ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
|
||||
u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
bool hint_retrievable = false);
|
||||
|
||||
class Shader final {
|
||||
class ShaderCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
~Shader();
|
||||
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||
VideoCore::ShaderNotify& shader_notify_);
|
||||
~ShaderCache();
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
GLuint GetHandle() const;
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
bool IsBuilt() const;
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
|
||||
|
||||
/// Gets the shader entries for the shader
|
||||
const ShaderEntries& GetEntries() const {
|
||||
return entries;
|
||||
}
|
||||
|
||||
const VideoCommon::Shader::Registry& GetRegistry() const {
|
||||
return *registry;
|
||||
}
|
||||
|
||||
/// Mark a OpenGL shader as built
|
||||
void AsyncOpenGLBuilt(OGLProgram new_program);
|
||||
|
||||
/// Mark a GLASM shader as built
|
||||
void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
|
||||
|
||||
static std::unique_ptr<Shader> CreateStageFromMemory(
|
||||
const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
|
||||
|
||||
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code);
|
||||
|
||||
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader);
|
||||
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
|
||||
|
||||
private:
|
||||
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
|
||||
ProgramSharedPtr program, bool is_built_ = true);
|
||||
GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
|
||||
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
ProgramSharedPtr program;
|
||||
GLuint handle = 0;
|
||||
bool is_built{};
|
||||
};
|
||||
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
|
||||
|
||||
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
|
||||
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_);
|
||||
~ShaderCacheOpenGL() override;
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
|
||||
|
||||
/// Loads disk cache for the current game
|
||||
void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
|
||||
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
|
||||
std::span<Shader::Environment* const> envs, bool build_in_parallel);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
Shader* GetStageProgram(Maxwell::ShaderProgram program,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders);
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
|
||||
const VideoCommon::ShaderInfo* shader);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader* GetComputeKernel(GPUVAddr code_addr);
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
|
||||
const ComputePipelineKey& key,
|
||||
Shader::Environment& env);
|
||||
|
||||
private:
|
||||
ProgramSharedPtr GeneratePrecompiledProgram(
|
||||
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
||||
const std::unordered_set<GLenum>& supported_formats);
|
||||
std::unique_ptr<ShaderWorker> CreateWorkers() const;
|
||||
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
const Device& device;
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
VideoCore::ShaderNotify& shader_notify;
|
||||
const bool use_asynchronous_shaders;
|
||||
|
||||
ShaderDiskCacheOpenGL disk_cache;
|
||||
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
||||
GraphicsPipelineKey graphics_key{};
|
||||
GraphicsPipeline* current_pipeline{};
|
||||
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
ShaderContext::ShaderPools main_pools;
|
||||
std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||
std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
Shader::Profile profile;
|
||||
Shader::HostTranslateInfo host_info;
|
||||
|
||||
std::filesystem::path shader_cache_filename;
|
||||
std::unique_ptr<ShaderWorker> workers;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
33
src/video_core/renderer_opengl/gl_shader_context.h
Normal file
33
src/video_core/renderer_opengl/gl_shader_context.h
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
|
||||
namespace OpenGL::ShaderContext {
|
||||
struct ShaderPools {
|
||||
void ReleaseContents() {
|
||||
flow_block.ReleaseContents();
|
||||
block.ReleaseContents();
|
||||
inst.ReleaseContents();
|
||||
}
|
||||
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst;
|
||||
Shader::ObjectPool<Shader::IR::Block> block;
|
||||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
|
||||
};
|
||||
|
||||
struct Context {
|
||||
explicit Context(Core::Frontend::EmuWindow& emu_window)
|
||||
: gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
|
||||
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
|
||||
Core::Frontend::GraphicsContext::Scoped scoped;
|
||||
ShaderPools pools;
|
||||
};
|
||||
|
||||
} // namespace OpenGL::ShaderContext
|
File diff suppressed because it is too large
Load diff
|
@ -1,69 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using ImageEntry = VideoCommon::Shader::ImageEntry;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
|
||||
: ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
|
||||
|
||||
u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 index = 0;
|
||||
};
|
||||
|
||||
struct GlobalMemoryEntry {
|
||||
constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
|
||||
bool is_written_)
|
||||
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
|
||||
is_written_} {}
|
||||
|
||||
u32 cbuf_index = 0;
|
||||
u32 cbuf_offset = 0;
|
||||
bool is_read = false;
|
||||
bool is_written = false;
|
||||
};
|
||||
|
||||
struct ShaderEntries {
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier,
|
||||
std::string_view suffix = {});
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,482 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/fs/file.h"
|
||||
#include "common/fs/fs.h"
|
||||
#include "common/fs/path_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scm_rev.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/zstd_compression.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using VideoCommon::Shader::BindlessSamplerMap;
|
||||
using VideoCommon::Shader::BoundSamplerMap;
|
||||
using VideoCommon::Shader::KeyMap;
|
||||
using VideoCommon::Shader::SeparateSamplerKey;
|
||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||
|
||||
struct ConstBufferKey {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
u32 value = 0;
|
||||
};
|
||||
|
||||
struct BoundSamplerEntry {
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct SeparateSamplerEntry {
|
||||
u32 cbuf1 = 0;
|
||||
u32 cbuf2 = 0;
|
||||
u32 offset1 = 0;
|
||||
u32 offset2 = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerEntry {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr u32 NativeVersion = 21;
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
|
||||
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
|
||||
return hash;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
|
||||
|
||||
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
|
||||
|
||||
bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
|
||||
if (!file.ReadObject(type)) {
|
||||
return false;
|
||||
}
|
||||
u32 code_size;
|
||||
u32 code_size_b;
|
||||
if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
|
||||
return false;
|
||||
}
|
||||
code.resize(code_size);
|
||||
code_b.resize(code_size_b);
|
||||
if (file.Read(code) != code_size) {
|
||||
return false;
|
||||
}
|
||||
if (HasProgramA() && file.Read(code_b) != code_size_b) {
|
||||
return false;
|
||||
}
|
||||
|
||||
u8 is_texture_handler_size_known;
|
||||
u32 texture_handler_size_value;
|
||||
u32 num_keys;
|
||||
u32 num_bound_samplers;
|
||||
u32 num_separate_samplers;
|
||||
u32 num_bindless_samplers;
|
||||
if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
|
||||
!file.ReadObject(is_texture_handler_size_known) ||
|
||||
!file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
|
||||
!file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
|
||||
!file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
|
||||
!file.ReadObject(num_bindless_samplers)) {
|
||||
return false;
|
||||
}
|
||||
if (is_texture_handler_size_known) {
|
||||
texture_handler_size = texture_handler_size_value;
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||
if (file.Read(flat_keys) != flat_keys.size() ||
|
||||
file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
|
||||
file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
|
||||
file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& entry : flat_keys) {
|
||||
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||
}
|
||||
for (const auto& entry : flat_bound_samplers) {
|
||||
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_separate_samplers) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||
key.offsets = {entry.offset1, entry.offset2};
|
||||
separate_samplers.emplace(key, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
|
||||
if (!file.WriteObject(static_cast<u32>(type)) ||
|
||||
!file.WriteObject(static_cast<u32>(code.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(code_b.size()))) {
|
||||
return false;
|
||||
}
|
||||
if (file.Write(code) != code.size()) {
|
||||
return false;
|
||||
}
|
||||
if (HasProgramA() && file.Write(code_b) != code_b.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
|
||||
!file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
|
||||
!file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
|
||||
!file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys;
|
||||
flat_keys.reserve(keys.size());
|
||||
for (const auto& [address, value] : keys) {
|
||||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||
}
|
||||
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||
flat_bound_samplers.reserve(bound_samplers.size());
|
||||
for (const auto& [address, sampler] : bound_samplers) {
|
||||
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||
}
|
||||
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||
flat_separate_samplers.reserve(separate_samplers.size());
|
||||
for (const auto& [key, sampler] : separate_samplers) {
|
||||
SeparateSamplerEntry entry;
|
||||
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||
entry.sampler = sampler;
|
||||
flat_separate_samplers.push_back(entry);
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||
for (const auto& [address, sampler] : bindless_samplers) {
|
||||
flat_bindless_samplers.push_back(
|
||||
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||
}
|
||||
|
||||
return file.Write(flat_keys) == flat_keys.size() &&
|
||||
file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
|
||||
file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
|
||||
file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
|
||||
}
|
||||
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
|
||||
|
||||
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
|
||||
|
||||
void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
|
||||
title_id = title_id_;
|
||||
}
|
||||
|
||||
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||
// Skip games without title id
|
||||
const bool has_title_id = title_id != 0;
|
||||
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
if (!file.IsOpen()) {
|
||||
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
|
||||
is_usable = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 version{};
|
||||
if (!file.ReadObject(version)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (version < NativeVersion) {
|
||||
LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
|
||||
file.Close();
|
||||
InvalidateTransferable();
|
||||
is_usable = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (version > NativeVersion) {
|
||||
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
|
||||
"of the emulator, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Version is valid, load the shaders
|
||||
std::vector<ShaderDiskCacheEntry> entries;
|
||||
while (static_cast<u64>(file.Tell()) < file.GetSize()) {
|
||||
ShaderDiskCacheEntry& entry = entries.emplace_back();
|
||||
if (!entry.Load(file)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
is_usable = true;
|
||||
return {std::move(entries)};
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
||||
if (!is_usable) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
if (!file.IsOpen()) {
|
||||
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (const auto result = LoadPrecompiledFile(file)) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
|
||||
file.Close();
|
||||
InvalidatePrecompiled();
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
|
||||
Common::FS::IOFile& file) {
|
||||
// Read compressed file from disk and decompress to virtual precompiled cache file
|
||||
std::vector<u8> compressed(file.GetSize());
|
||||
if (file.Read(compressed) != file.GetSize()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
|
||||
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
ShaderCacheVersionHash file_hash{};
|
||||
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (GetShaderCacheVersionHash() != file_hash) {
|
||||
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> entries;
|
||||
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
|
||||
u32 binary_size;
|
||||
auto& entry = entries.emplace_back();
|
||||
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
|
||||
!LoadObjectFromPrecompiled(entry.binary_format) ||
|
||||
!LoadObjectFromPrecompiled(binary_size)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
entry.binary.resize(binary_size);
|
||||
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
|
||||
if (!Common::FS::RemoveFile(GetTransferablePath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
|
||||
Common::FS::PathToUTF8String(GetTransferablePath()));
|
||||
}
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
|
||||
// Clear virtaul precompiled cache file
|
||||
precompiled_cache_virtual_file.Resize(0);
|
||||
|
||||
if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
|
||||
Common::FS::PathToUTF8String(GetPrecompiledPath()));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
|
||||
if (!is_usable) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 id = entry.unique_identifier;
|
||||
if (stored_transferable.contains(id)) {
|
||||
// The shader already exists
|
||||
return;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file = AppendTransferableFile();
|
||||
if (!file.IsOpen()) {
|
||||
return;
|
||||
}
|
||||
if (!entry.Save(file)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
|
||||
file.Close();
|
||||
InvalidateTransferable();
|
||||
return;
|
||||
}
|
||||
|
||||
stored_transferable.insert(id);
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
|
||||
if (!is_usable) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
|
||||
// when writing the dump. This should be done the moment I get access to write to the virtual
|
||||
// file.
|
||||
if (precompiled_cache_virtual_file.GetSize() == 0) {
|
||||
SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
}
|
||||
|
||||
GLint binary_length;
|
||||
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
|
||||
|
||||
GLenum binary_format;
|
||||
std::vector<u8> binary(binary_length);
|
||||
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
|
||||
|
||||
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
|
||||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
|
||||
unique_identifier);
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
}
|
||||
|
||||
Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
|
||||
if (!EnsureDirectories()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto transferable_path{GetTransferablePath()};
|
||||
const bool existed = Common::FS::Exists(transferable_path);
|
||||
|
||||
Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
|
||||
Common::FS::PathToUTF8String(transferable_path));
|
||||
return {};
|
||||
}
|
||||
if (!existed || file.GetSize() == 0) {
|
||||
// If the file didn't exist, write its version
|
||||
if (!file.WriteObject(NativeVersion)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
|
||||
Common::FS::PathToUTF8String(transferable_path));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
|
||||
const auto hash{GetShaderCacheVersionHash()};
|
||||
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
|
||||
LOG_ERROR(
|
||||
Render_OpenGL,
|
||||
"Failed to write precompiled cache version hash to virtual precompiled cache file");
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
|
||||
const std::vector<u8> compressed =
|
||||
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
|
||||
|
||||
const auto precompiled_path = GetPrecompiledPath();
|
||||
Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
|
||||
Common::FS::PathToUTF8String(precompiled_path));
|
||||
return;
|
||||
}
|
||||
if (file.Write(compressed) != compressed.size()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
|
||||
Common::FS::PathToUTF8String(precompiled_path));
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
|
||||
const auto CreateDir = [](const std::filesystem::path& dir) {
|
||||
if (!Common::FS::CreateDir(dir)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
|
||||
Common::FS::PathToUTF8String(dir));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
|
||||
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
|
||||
CreateDir(GetPrecompiledDir());
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
|
||||
return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
|
||||
return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
|
||||
return GetBaseDir() / "transferable";
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
|
||||
return GetBaseDir() / "precompiled";
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
|
||||
return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
|
||||
return fmt::format("{:016X}", title_id);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,176 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/file_sys/vfs_vector.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
|
||||
namespace Common::FS {
|
||||
class IOFile;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
|
||||
/// Describes a shader and how it's used by the guest GPU
|
||||
struct ShaderDiskCacheEntry {
|
||||
ShaderDiskCacheEntry();
|
||||
~ShaderDiskCacheEntry();
|
||||
|
||||
bool Load(Common::FS::IOFile& file);
|
||||
|
||||
bool Save(Common::FS::IOFile& file) const;
|
||||
|
||||
bool HasProgramA() const {
|
||||
return !code.empty() && !code_b.empty();
|
||||
}
|
||||
|
||||
Tegra::Engines::ShaderType type{};
|
||||
ProgramCode code;
|
||||
ProgramCode code_b;
|
||||
|
||||
u64 unique_identifier = 0;
|
||||
std::optional<u32> texture_handler_size;
|
||||
u32 bound_buffer = 0;
|
||||
VideoCommon::Shader::GraphicsInfo graphics_info;
|
||||
VideoCommon::Shader::ComputeInfo compute_info;
|
||||
VideoCommon::Shader::KeyMap keys;
|
||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||
};
|
||||
|
||||
/// Contains an OpenGL dumped binary program
|
||||
struct ShaderDiskCachePrecompiled {
|
||||
u64 unique_identifier = 0;
|
||||
GLenum binary_format = 0;
|
||||
std::vector<u8> binary;
|
||||
};
|
||||
|
||||
class ShaderDiskCacheOpenGL {
|
||||
public:
|
||||
explicit ShaderDiskCacheOpenGL();
|
||||
~ShaderDiskCacheOpenGL();
|
||||
|
||||
/// Binds a title ID for all future operations.
|
||||
void BindTitleID(u64 title_id);
|
||||
|
||||
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
|
||||
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
|
||||
|
||||
/// Loads current game's precompiled cache. Invalidates on failure.
|
||||
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
|
||||
|
||||
/// Removes the transferable (and precompiled) cache file.
|
||||
void InvalidateTransferable();
|
||||
|
||||
/// Removes the precompiled cache file and clears virtual precompiled cache file.
|
||||
void InvalidatePrecompiled();
|
||||
|
||||
/// Saves a raw dump to the transferable file. Checks for collisions.
|
||||
void SaveEntry(const ShaderDiskCacheEntry& entry);
|
||||
|
||||
/// Saves a dump entry to the precompiled file. Does not check for collisions.
|
||||
void SavePrecompiled(u64 unique_identifier, GLuint program);
|
||||
|
||||
/// Serializes virtual precompiled shader cache file to real file
|
||||
void SaveVirtualPrecompiledFile();
|
||||
|
||||
private:
|
||||
/// Loads the transferable cache. Returns empty on failure.
|
||||
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
|
||||
Common::FS::IOFile& file);
|
||||
|
||||
/// Opens current game's transferable file and write it's header if it doesn't exist
|
||||
Common::FS::IOFile AppendTransferableFile() const;
|
||||
|
||||
/// Save precompiled header to precompiled_cache_in_memory
|
||||
void SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
|
||||
/// Create shader disk cache directories. Returns true on success.
|
||||
bool EnsureDirectories() const;
|
||||
|
||||
/// Gets current game's transferable file path
|
||||
std::filesystem::path GetTransferablePath() const;
|
||||
|
||||
/// Gets current game's precompiled file path
|
||||
std::filesystem::path GetPrecompiledPath() const;
|
||||
|
||||
/// Get user's transferable directory path
|
||||
std::filesystem::path GetTransferableDir() const;
|
||||
|
||||
/// Get user's precompiled directory path
|
||||
std::filesystem::path GetPrecompiledDir() const;
|
||||
|
||||
/// Get user's shader directory path
|
||||
std::filesystem::path GetBaseDir() const;
|
||||
|
||||
/// Get current game's title id
|
||||
std::string GetTitleID() const;
|
||||
|
||||
template <typename T>
|
||||
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
|
||||
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += write_length;
|
||||
return write_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
|
||||
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += read_length;
|
||||
return read_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SaveObjectToPrecompiled(const T& object) {
|
||||
return SaveArrayToPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
bool SaveObjectToPrecompiled(bool object) {
|
||||
const auto value = static_cast<u8>(object);
|
||||
return SaveArrayToPrecompiled(&value, 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadObjectFromPrecompiled(T& object) {
|
||||
return LoadArrayFromPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
|
||||
// file
|
||||
FileSys::VectorVfsFile precompiled_cache_virtual_file;
|
||||
// Stores the current offset of the precompiled cache file for IO purposes
|
||||
std::size_t precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
// Stored transferable shaders
|
||||
std::unordered_set<u64> stored_transferable;
|
||||
|
||||
/// Title ID to operate on
|
||||
u64 title_id = 0;
|
||||
|
||||
// The cache has been loaded at boot
|
||||
bool is_usable = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,149 +1,3 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
|
||||
if (current == old) {
|
||||
return;
|
||||
}
|
||||
if (current == 0) {
|
||||
if (enabled) {
|
||||
enabled = false;
|
||||
glDisable(stage);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!enabled) {
|
||||
enabled = true;
|
||||
glEnable(stage);
|
||||
}
|
||||
glBindProgramARB(stage, current);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ProgramManager::ProgramManager(const Device& device)
|
||||
: use_assembly_programs{device.UseAssemblyShaders()} {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
} else {
|
||||
graphics_pipeline.Create();
|
||||
glBindProgramPipeline(graphics_pipeline.handle);
|
||||
}
|
||||
}
|
||||
|
||||
ProgramManager::~ProgramManager() = default;
|
||||
|
||||
void ProgramManager::BindCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||
} else {
|
||||
is_graphics_bound = false;
|
||||
glUseProgram(program);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindGraphicsPipeline() {
|
||||
if (!use_assembly_programs) {
|
||||
UpdateSourcePrograms();
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostPipeline(GLuint pipeline) {
|
||||
if (use_assembly_programs) {
|
||||
if (geometry_enabled) {
|
||||
geometry_enabled = false;
|
||||
old_state.geometry = 0;
|
||||
glDisable(GL_GEOMETRY_PROGRAM_NV);
|
||||
}
|
||||
} else {
|
||||
if (!is_graphics_bound) {
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
glBindProgramPipeline(pipeline);
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestPipeline() {
|
||||
if (use_assembly_programs) {
|
||||
glBindProgramPipeline(0);
|
||||
} else {
|
||||
glBindProgramPipeline(graphics_pipeline.handle);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glDisable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
glUseProgram(program);
|
||||
is_graphics_bound = false;
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestCompute() {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::UseVertexShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||
}
|
||||
current_state.vertex = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UseGeometryShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
|
||||
}
|
||||
current_state.geometry = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UseFragmentShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
|
||||
}
|
||||
current_state.fragment = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UpdateSourcePrograms() {
|
||||
if (!is_graphics_bound) {
|
||||
is_graphics_bound = true;
|
||||
glUseProgram(0);
|
||||
}
|
||||
|
||||
const GLuint handle = graphics_pipeline.handle;
|
||||
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
|
||||
if (current == old) {
|
||||
return;
|
||||
}
|
||||
glUseProgramStages(handle, stage, current);
|
||||
};
|
||||
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
|
||||
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
|
||||
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
|
||||
|
||||
old_state = current_state;
|
||||
}
|
||||
|
||||
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
|
||||
const auto& regs = maxwell.regs;
|
||||
|
||||
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
|
||||
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -4,79 +4,142 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <array>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
|
||||
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
/// Not following that rule will cause problems on some AMD drivers.
|
||||
struct alignas(16) MaxwellUniformData {
|
||||
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
|
||||
|
||||
GLfloat y_direction;
|
||||
};
|
||||
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
|
||||
static_assert(sizeof(MaxwellUniformData) < 16384,
|
||||
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
class ProgramManager {
|
||||
public:
|
||||
explicit ProgramManager(const Device& device);
|
||||
~ProgramManager();
|
||||
static constexpr size_t NUM_STAGES = 5;
|
||||
|
||||
/// Binds a compute program
|
||||
void BindCompute(GLuint program);
|
||||
|
||||
/// Updates bound programs.
|
||||
void BindGraphicsPipeline();
|
||||
|
||||
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
|
||||
void BindHostPipeline(GLuint pipeline);
|
||||
|
||||
/// Rewinds BindHostPipeline state changes.
|
||||
void RestoreGuestPipeline();
|
||||
|
||||
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
|
||||
void BindHostCompute(GLuint program);
|
||||
|
||||
/// Rewinds BindHostCompute state changes.
|
||||
void RestoreGuestCompute();
|
||||
|
||||
void UseVertexShader(GLuint program);
|
||||
void UseGeometryShader(GLuint program);
|
||||
void UseFragmentShader(GLuint program);
|
||||
|
||||
private:
|
||||
struct PipelineState {
|
||||
GLuint vertex = 0;
|
||||
GLuint geometry = 0;
|
||||
GLuint fragment = 0;
|
||||
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
/// Update GLSL programs.
|
||||
void UpdateSourcePrograms();
|
||||
public:
|
||||
explicit ProgramManager(const Device& device) {
|
||||
glCreateProgramPipelines(1, &pipeline.handle);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
}
|
||||
|
||||
OGLPipeline graphics_pipeline;
|
||||
void BindComputeProgram(GLuint program) {
|
||||
glUseProgram(program);
|
||||
is_compute_bound = true;
|
||||
}
|
||||
|
||||
PipelineState current_state;
|
||||
PipelineState old_state;
|
||||
void BindComputeAssemblyProgram(GLuint program) {
|
||||
if (current_assembly_compute_program != program) {
|
||||
current_assembly_compute_program = program;
|
||||
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||
}
|
||||
UnbindPipeline();
|
||||
}
|
||||
|
||||
bool use_assembly_programs = false;
|
||||
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
|
||||
static constexpr std::array<GLenum, 5> stage_enums{
|
||||
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
|
||||
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
|
||||
};
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (current_programs[stage] != programs[stage].handle) {
|
||||
current_programs[stage] = programs[stage].handle;
|
||||
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
|
||||
}
|
||||
}
|
||||
BindPipeline();
|
||||
}
|
||||
|
||||
bool is_graphics_bound = true;
|
||||
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
|
||||
if (current_programs[0] != vertex) {
|
||||
current_programs[0] = vertex;
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
|
||||
}
|
||||
if (current_programs[4] != fragment) {
|
||||
current_programs[4] = fragment;
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
|
||||
}
|
||||
glUseProgramStages(
|
||||
pipeline.handle,
|
||||
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
|
||||
current_programs[1] = 0;
|
||||
current_programs[2] = 0;
|
||||
current_programs[3] = 0;
|
||||
|
||||
bool vertex_enabled = false;
|
||||
bool geometry_enabled = false;
|
||||
bool fragment_enabled = false;
|
||||
if (current_stage_mask != 0) {
|
||||
current_stage_mask = 0;
|
||||
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
|
||||
glDisable(program_type);
|
||||
}
|
||||
}
|
||||
BindPipeline();
|
||||
}
|
||||
|
||||
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
|
||||
u32 stage_mask) {
|
||||
const u32 changed_mask = current_stage_mask ^ stage_mask;
|
||||
current_stage_mask = stage_mask;
|
||||
|
||||
if (changed_mask != 0) {
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (((changed_mask >> stage) & 1) != 0) {
|
||||
if (((stage_mask >> stage) & 1) != 0) {
|
||||
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
|
||||
} else {
|
||||
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (current_programs[stage] != programs[stage].handle) {
|
||||
current_programs[stage] = programs[stage].handle;
|
||||
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
|
||||
}
|
||||
}
|
||||
UnbindPipeline();
|
||||
}
|
||||
|
||||
void RestoreGuestCompute() {}
|
||||
|
||||
private:
|
||||
void BindPipeline() {
|
||||
if (!is_pipeline_bound) {
|
||||
is_pipeline_bound = true;
|
||||
glBindProgramPipeline(pipeline.handle);
|
||||
}
|
||||
UnbindCompute();
|
||||
}
|
||||
|
||||
void UnbindPipeline() {
|
||||
if (is_pipeline_bound) {
|
||||
is_pipeline_bound = false;
|
||||
glBindProgramPipeline(0);
|
||||
}
|
||||
UnbindCompute();
|
||||
}
|
||||
|
||||
void UnbindCompute() {
|
||||
if (is_compute_bound) {
|
||||
is_compute_bound = false;
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
|
||||
OGLPipeline pipeline;
|
||||
bool is_pipeline_bound{};
|
||||
bool is_compute_bound{};
|
||||
|
||||
u32 current_stage_mask = 0;
|
||||
std::array<GLuint, NUM_STAGES> current_programs{};
|
||||
GLuint current_assembly_compute_program = 0;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -5,57 +5,108 @@
|
|||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
std::string_view StageDebugName(GLenum type) {
|
||||
switch (type) {
|
||||
case GL_VERTEX_SHADER:
|
||||
return "vertex";
|
||||
case GL_GEOMETRY_SHADER:
|
||||
return "geometry";
|
||||
case GL_FRAGMENT_SHADER:
|
||||
return "fragment";
|
||||
case GL_COMPUTE_SHADER:
|
||||
return "compute";
|
||||
static OGLProgram LinkSeparableProgram(GLuint shader) {
|
||||
OGLProgram program;
|
||||
program.handle = glCreateProgram();
|
||||
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
|
||||
glAttachShader(program.handle, shader);
|
||||
glLinkProgram(program.handle);
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return program;
|
||||
}
|
||||
UNIMPLEMENTED();
|
||||
return "unknown";
|
||||
GLint link_status{};
|
||||
glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
|
||||
|
||||
GLint log_length{};
|
||||
glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
|
||||
if (log_length == 0) {
|
||||
return program;
|
||||
}
|
||||
std::string log(log_length, 0);
|
||||
glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
|
||||
if (link_status == GL_FALSE) {
|
||||
LOG_ERROR(Render_OpenGL, "{}", log);
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "{}", log);
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
static void LogShader(GLuint shader, std::string_view code = {}) {
|
||||
GLint shader_status{};
|
||||
glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
|
||||
if (shader_status == GL_FALSE) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to build shader");
|
||||
}
|
||||
GLint log_length{};
|
||||
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
|
||||
if (log_length == 0) {
|
||||
return;
|
||||
}
|
||||
std::string log(log_length, 0);
|
||||
glGetShaderInfoLog(shader, log_length, nullptr, log.data());
|
||||
if (shader_status == GL_FALSE) {
|
||||
LOG_ERROR(Render_OpenGL, "{}", log);
|
||||
if (!code.empty()) {
|
||||
LOG_INFO(Render_OpenGL, "\n{}", code);
|
||||
}
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "{}", log);
|
||||
}
|
||||
}
|
||||
|
||||
GLuint LoadShader(std::string_view source, GLenum type) {
|
||||
const std::string_view debug_type = StageDebugName(type);
|
||||
const GLuint shader_id = glCreateShader(type);
|
||||
OGLProgram CreateProgram(std::string_view code, GLenum stage) {
|
||||
OGLShader shader;
|
||||
shader.handle = glCreateShader(stage);
|
||||
|
||||
const GLchar* source_string = source.data();
|
||||
const GLint source_length = static_cast<GLint>(source.size());
|
||||
const GLint length = static_cast<GLint>(code.size());
|
||||
const GLchar* const code_ptr = code.data();
|
||||
glShaderSource(shader.handle, 1, &code_ptr, &length);
|
||||
glCompileShader(shader.handle);
|
||||
if (Settings::values.renderer_debug) {
|
||||
LogShader(shader.handle, code);
|
||||
}
|
||||
return LinkSeparableProgram(shader.handle);
|
||||
}
|
||||
|
||||
glShaderSource(shader_id, 1, &source_string, &source_length);
|
||||
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
|
||||
glCompileShader(shader_id);
|
||||
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
|
||||
OGLShader shader;
|
||||
shader.handle = glCreateShader(stage);
|
||||
|
||||
GLint result = GL_FALSE;
|
||||
GLint info_log_length;
|
||||
glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
|
||||
glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
|
||||
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
|
||||
static_cast<GLsizei>(code.size_bytes()));
|
||||
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
|
||||
if (Settings::values.renderer_debug) {
|
||||
LogShader(shader.handle);
|
||||
}
|
||||
return LinkSeparableProgram(shader.handle);
|
||||
}
|
||||
|
||||
if (info_log_length > 1) {
|
||||
std::string shader_error(info_log_length, ' ');
|
||||
glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
|
||||
if (result == GL_TRUE) {
|
||||
LOG_DEBUG(Render_OpenGL, "{}", shader_error);
|
||||
} else {
|
||||
LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
|
||||
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
|
||||
OGLAssemblyProgram program;
|
||||
glGenProgramsARB(1, &program.handle);
|
||||
glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
|
||||
static_cast<GLsizei>(code.size()), code.data());
|
||||
if (Settings::values.renderer_debug) {
|
||||
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
|
||||
if (err && *err) {
|
||||
if (std::strstr(err, "error")) {
|
||||
LOG_CRITICAL(Render_OpenGL, "\n{}", err);
|
||||
LOG_INFO(Render_OpenGL, "\n{}", code);
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "\n{}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
return shader_id;
|
||||
return program;
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -4,92 +4,23 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
namespace OpenGL {
|
||||
|
||||
/**
|
||||
* Utility function to log the source code of a list of shaders.
|
||||
* @param shaders The OpenGL shaders whose source we will print.
|
||||
*/
|
||||
template <typename... T>
|
||||
void LogShaderSource(T... shaders) {
|
||||
auto shader_list = {shaders...};
|
||||
OGLProgram CreateProgram(std::string_view code, GLenum stage);
|
||||
|
||||
for (const auto& shader : shader_list) {
|
||||
if (shader == 0)
|
||||
continue;
|
||||
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
|
||||
|
||||
GLint source_length;
|
||||
glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
|
||||
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
|
||||
|
||||
std::string source(source_length, ' ');
|
||||
glGetShaderSource(shader, source_length, nullptr, &source[0]);
|
||||
LOG_INFO(Render_OpenGL, "Shader source {}", source);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to create and compile an OpenGL GLSL shader
|
||||
* @param source String of the GLSL shader program
|
||||
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
|
||||
*/
|
||||
GLuint LoadShader(std::string_view source, GLenum type);
|
||||
|
||||
/**
|
||||
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
|
||||
* @param separable_program whether to create a separable program
|
||||
* @param shaders ID of shaders to attach to the program
|
||||
* @returns Handle of the newly created OpenGL program object
|
||||
*/
|
||||
template <typename... T>
|
||||
GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
|
||||
// Link the program
|
||||
LOG_DEBUG(Render_OpenGL, "Linking program...");
|
||||
|
||||
GLuint program_id = glCreateProgram();
|
||||
|
||||
((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
|
||||
|
||||
if (separable_program) {
|
||||
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
|
||||
}
|
||||
if (hint_retrievable) {
|
||||
glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
|
||||
}
|
||||
|
||||
glLinkProgram(program_id);
|
||||
|
||||
// Check the program
|
||||
GLint result = GL_FALSE;
|
||||
GLint info_log_length;
|
||||
glGetProgramiv(program_id, GL_LINK_STATUS, &result);
|
||||
glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
|
||||
|
||||
if (info_log_length > 1) {
|
||||
std::string program_error(info_log_length, ' ');
|
||||
glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
|
||||
if (result == GL_TRUE) {
|
||||
LOG_DEBUG(Render_OpenGL, "{}", program_error);
|
||||
} else {
|
||||
LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == GL_FALSE) {
|
||||
// There was a problem linking the shader, print the source for debugging purposes.
|
||||
LogShaderSource(shaders...);
|
||||
}
|
||||
|
||||
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
|
||||
|
||||
((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
|
||||
|
||||
return program_id;
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
|
|||
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
|
||||
}
|
||||
|
||||
void SetupDirtyShaders(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
|
||||
Shaders);
|
||||
}
|
||||
|
||||
void SetupDirtyPolygonModes(Tables& tables) {
|
||||
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
|
||||
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
|
||||
|
@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
|
|||
SetupDirtyScissors(tables);
|
||||
SetupDirtyVertexInstances(tables);
|
||||
SetupDirtyVertexFormat(tables);
|
||||
SetupDirtyShaders(tables);
|
||||
SetupDirtyPolygonModes(tables);
|
||||
SetupDirtyDepthTest(tables);
|
||||
SetupDirtyStencilTest(tables);
|
||||
|
|
|
@ -52,7 +52,6 @@ enum : u8 {
|
|||
BlendState0,
|
||||
BlendState7 = BlendState0 + 7,
|
||||
|
||||
Shaders,
|
||||
ClipDistances,
|
||||
|
||||
PolygonModes,
|
||||
|
|
|
@ -24,9 +24,7 @@
|
|||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using Tegra::Texture::TextureMipmapFilter;
|
||||
using Tegra::Texture::TextureType;
|
||||
|
@ -59,107 +57,6 @@ struct CopyRegion {
|
|||
GLsizei depth;
|
||||
};
|
||||
|
||||
struct FormatTuple {
|
||||
GLenum internal_format;
|
||||
GLenum format = GL_NONE;
|
||||
GLenum type = GL_NONE;
|
||||
};
|
||||
|
||||
constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
|
||||
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
|
||||
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
|
||||
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
|
||||
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
|
||||
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
|
||||
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
|
||||
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
|
||||
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
|
||||
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
|
||||
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
|
||||
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
|
||||
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
|
||||
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
|
||||
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
|
||||
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
|
||||
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
|
||||
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
|
||||
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
|
||||
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
|
||||
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
|
||||
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
|
||||
}};
|
||||
|
||||
constexpr std::array ACCELERATED_FORMATS{
|
||||
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
|
||||
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
|
||||
|
@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
|
|||
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
|
||||
};
|
||||
|
||||
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
|
||||
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
|
||||
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
|
||||
}
|
||||
|
||||
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
|
||||
switch (info.type) {
|
||||
case ImageType::e1D:
|
||||
|
@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
|
|||
return GL_NONE;
|
||||
}
|
||||
|
||||
GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
|
||||
GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
|
||||
const bool is_multisampled = num_samples > 1;
|
||||
switch (type) {
|
||||
case ImageViewType::e1D:
|
||||
case Shader::TextureType::Color1D:
|
||||
return GL_TEXTURE_1D;
|
||||
case ImageViewType::e2D:
|
||||
case Shader::TextureType::Color2D:
|
||||
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
|
||||
case ImageViewType::Cube:
|
||||
case Shader::TextureType::ColorCube:
|
||||
return GL_TEXTURE_CUBE_MAP;
|
||||
case ImageViewType::e3D:
|
||||
case Shader::TextureType::Color3D:
|
||||
return GL_TEXTURE_3D;
|
||||
case ImageViewType::e1DArray:
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
return GL_TEXTURE_1D_ARRAY;
|
||||
case ImageViewType::e2DArray:
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
|
||||
case ImageViewType::CubeArray:
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
||||
case ImageViewType::Rect:
|
||||
return GL_TEXTURE_RECTANGLE;
|
||||
case ImageViewType::Buffer:
|
||||
case Shader::TextureType::Buffer:
|
||||
return GL_TEXTURE_BUFFER;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid image view type={}", type);
|
||||
|
@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
|
|||
default:
|
||||
return false;
|
||||
}
|
||||
const GLenum internal_format = GetFormatTuple(info.format).internal_format;
|
||||
const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
|
||||
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
|
||||
if (format_info.is_compressed) {
|
||||
return false;
|
||||
|
@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
|
|||
|
||||
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
||||
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
|
||||
const GLuint texture = image_view->DefaultHandle();
|
||||
glNamedFramebufferTexture(fbo, attachment, texture, 0);
|
||||
glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
|
||||
return;
|
||||
}
|
||||
const GLuint texture = image_view->Handle(ImageViewType::e3D);
|
||||
const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
|
||||
if (image_view->range.extent.layers > 1) {
|
||||
// TODO: OpenGL doesn't support rendering to a fixed number of slices
|
||||
glNamedFramebufferTexture(fbo, attachment, texture, 0);
|
||||
|
@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
|||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
|
||||
switch (format) {
|
||||
case Shader::ImageFormat::Typeless:
|
||||
break;
|
||||
case Shader::ImageFormat::R8_SINT:
|
||||
return GL_R8I;
|
||||
case Shader::ImageFormat::R8_UINT:
|
||||
return GL_R8UI;
|
||||
case Shader::ImageFormat::R16_UINT:
|
||||
return GL_R16UI;
|
||||
case Shader::ImageFormat::R16_SINT:
|
||||
return GL_R16I;
|
||||
case Shader::ImageFormat::R32_UINT:
|
||||
return GL_R32UI;
|
||||
case Shader::ImageFormat::R32G32_UINT:
|
||||
return GL_RG32UI;
|
||||
case Shader::ImageFormat::R32G32B32A32_UINT:
|
||||
return GL_RGBA32UI;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid image format={}", format);
|
||||
return GL_R32UI;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ImageBufferMap::~ImageBufferMap() {
|
||||
|
@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
|
||||
for (size_t i = 0; i < TARGETS.size(); ++i) {
|
||||
const GLenum target = TARGETS[i];
|
||||
for (const FormatTuple& tuple : FORMAT_TABLE) {
|
||||
for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
|
||||
const GLenum format = tuple.internal_format;
|
||||
GLint compat_class;
|
||||
GLint compat_type;
|
||||
|
@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
|
||||
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
|
||||
null_image_3d.Create(GL_TEXTURE_3D);
|
||||
null_image_rect.Create(GL_TEXTURE_RECTANGLE);
|
||||
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
|
||||
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
|
||||
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
|
||||
glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
|
||||
|
||||
std::array<GLuint, 4> new_handles;
|
||||
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
|
||||
|
@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
|
||||
GL_R8, 0, 1, 0, 6);
|
||||
const std::array texture_handles{
|
||||
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
|
||||
null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
|
||||
null_image_view_2d_array.handle, null_image_view_cube.handle,
|
||||
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
|
||||
null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
|
||||
null_image_view_cube.handle,
|
||||
};
|
||||
for (const GLuint handle : texture_handles) {
|
||||
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
|
||||
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
|
||||
}
|
||||
const auto set_view = [this](ImageViewType type, GLuint handle) {
|
||||
const auto set_view = [this](Shader::TextureType type, GLuint handle) {
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const std::string name = fmt::format("NullImage {}", type);
|
||||
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
}
|
||||
null_image_views[static_cast<size_t>(type)] = handle;
|
||||
};
|
||||
set_view(ImageViewType::e1D, null_image_view_1d.handle);
|
||||
set_view(ImageViewType::e2D, null_image_view_2d.handle);
|
||||
set_view(ImageViewType::Cube, null_image_view_cube.handle);
|
||||
set_view(ImageViewType::e3D, null_image_3d.handle);
|
||||
set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
|
||||
set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
|
||||
set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
|
||||
set_view(ImageViewType::Rect, null_image_rect.handle);
|
||||
set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
|
||||
set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
|
||||
set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
|
||||
set_view(Shader::TextureType::Color3D, null_image_3d.handle);
|
||||
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
|
||||
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
|
||||
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
|
||||
}
|
||||
|
||||
TextureCacheRuntime::~TextureCacheRuntime() = default;
|
||||
|
@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
|||
gl_format = GL_RGBA;
|
||||
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
} else {
|
||||
const auto& tuple = GetFormatTuple(info.format);
|
||||
const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
|
||||
gl_internal_format = tuple.internal_format;
|
||||
gl_format = tuple.format;
|
||||
gl_type = tuple.type;
|
||||
|
@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
|||
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
|
||||
break;
|
||||
case GL_TEXTURE_BUFFER:
|
||||
buffer.Create();
|
||||
glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
|
||||
UNREACHABLE();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
|
||||
|
@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
|
|||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
|
||||
copy.dst_offset, copy.size);
|
||||
}
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||
|
@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
|
||||
} else {
|
||||
internal_format = GetFormatTuple(format).internal_format;
|
||||
internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
|
||||
}
|
||||
full_range = info.range;
|
||||
flat_range = info.range;
|
||||
set_object_label = device.HasDebuggingToolAttached();
|
||||
is_render_target = info.IsRenderTarget();
|
||||
original_texture = image.texture.handle;
|
||||
num_samples = image.info.num_samples;
|
||||
if (!is_render_target) {
|
||||
swizzle[0] = info.x_source;
|
||||
swizzle[1] = info.y_source;
|
||||
swizzle[2] = info.z_source;
|
||||
swizzle[3] = info.w_source;
|
||||
}
|
||||
VideoCommon::SubresourceRange flatten_range = info.range;
|
||||
std::array<GLuint, 2> handles;
|
||||
stored_views.reserve(2);
|
||||
|
||||
switch (info.type) {
|
||||
case ImageViewType::e1DArray:
|
||||
flatten_range.extent.layers = 1;
|
||||
flat_range.extent.layers = 1;
|
||||
[[fallthrough]];
|
||||
case ImageViewType::e1D:
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
|
||||
SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
|
||||
SetupView(Shader::TextureType::Color1D);
|
||||
SetupView(Shader::TextureType::ColorArray1D);
|
||||
break;
|
||||
case ImageViewType::e2DArray:
|
||||
flatten_range.extent.layers = 1;
|
||||
flat_range.extent.layers = 1;
|
||||
[[fallthrough]];
|
||||
case ImageViewType::e2D:
|
||||
if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
|
||||
|
@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
.base = {.level = info.range.base.level, .layer = 0},
|
||||
.extent = {.levels = 1, .layers = 1},
|
||||
};
|
||||
glGenTextures(1, handles.data());
|
||||
SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
|
||||
break;
|
||||
full_range = slice_range;
|
||||
|
||||
SetupView(Shader::TextureType::Color3D);
|
||||
} else {
|
||||
SetupView(Shader::TextureType::Color2D);
|
||||
SetupView(Shader::TextureType::ColorArray2D);
|
||||
}
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
|
||||
SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
|
||||
break;
|
||||
case ImageViewType::e3D:
|
||||
glGenTextures(1, handles.data());
|
||||
SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
|
||||
SetupView(Shader::TextureType::Color3D);
|
||||
break;
|
||||
case ImageViewType::CubeArray:
|
||||
flatten_range.extent.layers = 6;
|
||||
flat_range.extent.layers = 6;
|
||||
[[fallthrough]];
|
||||
case ImageViewType::Cube:
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
|
||||
SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
|
||||
SetupView(Shader::TextureType::ColorCube);
|
||||
SetupView(Shader::TextureType::ColorArrayCube);
|
||||
break;
|
||||
case ImageViewType::Rect:
|
||||
glGenTextures(1, handles.data());
|
||||
SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case ImageViewType::Buffer:
|
||||
glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
|
||||
SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
switch (info.type) {
|
||||
case ImageViewType::e1D:
|
||||
default_handle = Handle(Shader::TextureType::Color1D);
|
||||
break;
|
||||
case ImageViewType::e1DArray:
|
||||
default_handle = Handle(Shader::TextureType::ColorArray1D);
|
||||
break;
|
||||
case ImageViewType::e2D:
|
||||
default_handle = Handle(Shader::TextureType::Color2D);
|
||||
break;
|
||||
case ImageViewType::e2DArray:
|
||||
default_handle = Handle(Shader::TextureType::ColorArray2D);
|
||||
break;
|
||||
case ImageViewType::e3D:
|
||||
default_handle = Handle(Shader::TextureType::Color3D);
|
||||
break;
|
||||
case ImageViewType::Cube:
|
||||
default_handle = Handle(Shader::TextureType::ColorCube);
|
||||
break;
|
||||
case ImageViewType::CubeArray:
|
||||
default_handle = Handle(Shader::TextureType::ColorArrayCube);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default_handle = Handle(info.type);
|
||||
}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
|
||||
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
|
||||
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info)
|
||||
: VideoCommon::ImageViewBase{info, view_info} {}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
|
||||
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
|
||||
|
||||
void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
|
||||
GLuint handle, const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range) {
|
||||
if (info.type == ImageViewType::Buffer) {
|
||||
// TODO: Take offset from buffer cache
|
||||
glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
|
||||
image.guest_size_bytes);
|
||||
} else {
|
||||
const GLuint parent = image.texture.handle;
|
||||
const GLenum target = ImageTarget(view_type, image.info.num_samples);
|
||||
glTextureView(handle, target, parent, internal_format, view_range.base.level,
|
||||
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
|
||||
if (!info.IsRenderTarget()) {
|
||||
ApplySwizzle(handle, format, info.Swizzle());
|
||||
}
|
||||
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
|
||||
if (image_format == Shader::ImageFormat::Typeless) {
|
||||
return Handle(texture_type);
|
||||
}
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const std::string name = VideoCommon::Name(*this, view_type);
|
||||
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
|
||||
image_format == Shader::ImageFormat::R16_SINT};
|
||||
if (!storage_views) {
|
||||
storage_views = std::make_unique<StorageViews>();
|
||||
}
|
||||
stored_views.emplace_back().handle = handle;
|
||||
views[static_cast<size_t>(view_type)] = handle;
|
||||
auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
|
||||
GLuint& view{type_views[static_cast<size_t>(texture_type)]};
|
||||
if (view == 0) {
|
||||
view = MakeView(texture_type, ShaderFormat(image_format));
|
||||
}
|
||||
return view;
|
||||
}
|
||||
|
||||
void ImageView::SetupView(Shader::TextureType view_type) {
|
||||
views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
|
||||
}
|
||||
|
||||
GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
|
||||
VideoCommon::SubresourceRange view_range;
|
||||
switch (view_type) {
|
||||
case Shader::TextureType::Color1D:
|
||||
case Shader::TextureType::Color2D:
|
||||
case Shader::TextureType::ColorCube:
|
||||
view_range = flat_range;
|
||||
break;
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
case Shader::TextureType::Color3D:
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
view_range = full_range;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
OGLTextureView& view = stored_views.emplace_back();
|
||||
view.Create();
|
||||
|
||||
const GLenum target = ImageTarget(view_type, num_samples);
|
||||
glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
|
||||
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
|
||||
if (!is_render_target) {
|
||||
std::array<SwizzleSource, 4> casted_swizzle;
|
||||
std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
|
||||
return static_cast<SwizzleSource>(component_swizzle);
|
||||
});
|
||||
ApplySwizzle(view.handle, format, casted_swizzle);
|
||||
}
|
||||
if (set_object_label) {
|
||||
const std::string name = VideoCommon::Name(*this);
|
||||
glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
}
|
||||
return view.handle;
|
||||
}
|
||||
|
||||
Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
@ -127,13 +128,12 @@ private:
|
|||
OGLTexture null_image_1d_array;
|
||||
OGLTexture null_image_cube_array;
|
||||
OGLTexture null_image_3d;
|
||||
OGLTexture null_image_rect;
|
||||
OGLTextureView null_image_view_1d;
|
||||
OGLTextureView null_image_view_2d;
|
||||
OGLTextureView null_image_view_2d_array;
|
||||
OGLTextureView null_image_view_cube;
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
|
@ -154,8 +154,6 @@ public:
|
|||
void UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint StorageHandle() noexcept;
|
||||
|
@ -170,7 +168,6 @@ private:
|
|||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
OGLTexture texture;
|
||||
OGLBuffer buffer;
|
||||
OGLTextureView store_view;
|
||||
GLenum gl_internal_format = GL_NONE;
|
||||
GLenum gl_format = GL_NONE;
|
||||
|
@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
|
|||
|
||||
public:
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
|
||||
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
||||
|
||||
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
|
||||
return views[static_cast<size_t>(query_type)];
|
||||
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format);
|
||||
|
||||
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
|
||||
return views[static_cast<size_t>(handle_type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] GLuint DefaultHandle() const noexcept {
|
||||
|
@ -196,15 +200,38 @@ public:
|
|||
return internal_format;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
|
||||
const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range);
|
||||
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
|
||||
[[nodiscard]] u32 BufferSize() const noexcept {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct StorageViews {
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
|
||||
};
|
||||
|
||||
void SetupView(Shader::TextureType view_type);
|
||||
|
||||
GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
|
||||
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
|
||||
std::vector<OGLTextureView> stored_views;
|
||||
GLuint default_handle = 0;
|
||||
std::unique_ptr<StorageViews> storage_views;
|
||||
GLenum internal_format = GL_NONE;
|
||||
GLuint default_handle = 0;
|
||||
GPUVAddr gpu_addr = 0;
|
||||
u32 buffer_size = 0;
|
||||
GLuint original_texture = 0;
|
||||
int num_samples = 0;
|
||||
VideoCommon::SubresourceRange flat_range;
|
||||
VideoCommon::SubresourceRange full_range;
|
||||
std::array<u8, 4> swizzle{};
|
||||
bool set_object_label = false;
|
||||
bool is_render_target = false;
|
||||
};
|
||||
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
|
|
@ -5,12 +5,120 @@
|
|||
#pragma once
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace OpenGL::MaxwellToGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct FormatTuple {
|
||||
GLenum internal_format;
|
||||
GLenum format = GL_NONE;
|
||||
GLenum type = GL_NONE;
|
||||
};
|
||||
|
||||
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
|
||||
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
|
||||
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
|
||||
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
|
||||
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
|
||||
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
|
||||
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
|
||||
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
|
||||
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
|
||||
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
|
||||
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
|
||||
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
|
||||
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
|
||||
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
|
||||
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
|
||||
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
|
||||
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
|
||||
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
|
||||
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
|
||||
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
|
||||
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
|
||||
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
|
||||
}};
|
||||
|
||||
inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
|
||||
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
|
||||
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
|
||||
}
|
||||
|
||||
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case Maxwell::VertexAttribute::Type::UnsignedNorm:
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "video_core/host_shaders/opengl_present_vert.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
|
@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
|||
}
|
||||
AddTelemetryFields();
|
||||
InitOpenGLObjects();
|
||||
|
||||
// Initialize default attributes to match hardware's disabled attributes
|
||||
GLint max_attribs{};
|
||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
|
||||
for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
|
||||
glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
|
||||
}
|
||||
// Enable seamless cubemaps when per texture parameters are not available
|
||||
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
|
||||
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
|
||||
}
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
&vertex_buffer_address);
|
||||
}
|
||||
}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
|
|||
|
||||
void RendererOpenGL::InitOpenGLObjects() {
|
||||
// Create shader programs
|
||||
OGLShader vertex_shader;
|
||||
vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
|
||||
|
||||
OGLShader fragment_shader;
|
||||
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
|
||||
|
||||
vertex_program.Create(true, false, vertex_shader.handle);
|
||||
fragment_program.Create(true, false, fragment_shader.handle);
|
||||
|
||||
pipeline.Create();
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
|
||||
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
|
||||
present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
|
||||
|
||||
// Generate presentation sampler
|
||||
present_sampler.Create();
|
||||
|
@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
|
||||
// Clear screen to black
|
||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||
|
||||
// Enable seamless cubemaps when per texture parameters are not available
|
||||
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
|
||||
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
|
||||
}
|
||||
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
&vertex_buffer_address);
|
||||
}
|
||||
}
|
||||
|
||||
void RendererOpenGL::AddTelemetryFields() {
|
||||
|
@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
// Set projection matrix
|
||||
const std::array ortho_matrix =
|
||||
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
|
||||
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
||||
std::data(ortho_matrix));
|
||||
program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
|
||||
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
||||
ortho_matrix.data());
|
||||
|
||||
const auto& texcoords = screen_info.display_texcoords;
|
||||
auto left = texcoords.left;
|
||||
|
@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
state_tracker.NotifyClipControl();
|
||||
state_tracker.NotifyAlphaTest();
|
||||
|
||||
program_manager.BindHostPipeline(pipeline.handle);
|
||||
|
||||
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (screen_info.display_srgb) {
|
||||
|
@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
program_manager.RestoreGuestPipeline();
|
||||
// TODO
|
||||
// program_manager.RestoreGuestPipeline();
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderScreenshot() {
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -111,9 +110,8 @@ private:
|
|||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
OGLBuffer vertex_buffer;
|
||||
OGLProgram vertex_program;
|
||||
OGLProgram fragment_program;
|
||||
OGLPipeline pipeline;
|
||||
OGLProgram present_vertex;
|
||||
OGLProgram present_fragment;
|
||||
OGLFramebuffer screenshot_framebuffer;
|
||||
|
||||
// GPU address of the vertex buffer
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
|
||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
|
@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
|
|||
using VideoCore::Surface::BytesPerBlock;
|
||||
|
||||
namespace {
|
||||
|
||||
OGLProgram MakeProgram(std::string_view source) {
|
||||
OGLShader shader;
|
||||
shader.Create(source, GL_COMPUTE_SHADER);
|
||||
|
||||
OGLProgram program;
|
||||
program.Create(true, false, shader.handle);
|
||||
return program;
|
||||
return CreateProgram(source, GL_COMPUTE_SHADER);
|
||||
}
|
||||
|
||||
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
|
||||
return static_cast<size_t>(copy.extent.width * copy.extent.height *
|
||||
copy.src_subresource.num_layers);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
|
@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
|||
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
|
||||
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
|
||||
};
|
||||
program_manager.BindHostCompute(astc_decoder_program.handle);
|
||||
program_manager.BindComputeProgram(astc_decoder_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
|
||||
|
||||
|
@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
|||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
|
@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
|
|||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
|
@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
|
|||
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
|
@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
|
|||
static constexpr GLuint LOC_SRC_OFFSET = 0;
|
||||
static constexpr GLuint LOC_DST_OFFSET = 1;
|
||||
|
||||
program_manager.BindHostCompute(copy_bc4_program.handle);
|
||||
program_manager.BindComputeProgram(copy_bc4_program.handle);
|
||||
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_subresource.base_layer == 0);
|
||||
|
@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
|
|||
break;
|
||||
case 4: {
|
||||
// BGRA8 copy
|
||||
program_manager.BindHostCompute(copy_bgra_program.handle);
|
||||
program_manager.BindComputeProgram(copy_bgra_program.handle);
|
||||
constexpr GLenum FORMAT = GL_RGBA8;
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_offset == zero_offset);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue