Merge pull request #6585 from ameerj/hades

Shader Decompiler Rewrite
This commit is contained in:
bunnei 2021-07-25 11:39:04 -07:00 committed by GitHub
commit 98b26b6e12
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
428 changed files with 49376 additions and 27255 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,29 +0,0 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <string_view>
#include "common/common_types.h"
namespace Tegra::Engines {
enum class ShaderType : u32;
}
namespace VideoCommon::Shader {
class ShaderIR;
class Registry;
} // namespace VideoCommon::Shader
namespace OpenGL {
class Device;
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier);
} // namespace OpenGL

View file

@ -2,14 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <span>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
using VideoCore::Surface::PixelFormat;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
switch (gl_format) {
case GL_RGBA8_SNORM:
return GL_RGBA8;
case GL_R8_SNORM:
return GL_R8;
case GL_RGBA16_SNORM:
return GL_RGBA16;
case GL_R16_SNORM:
return GL_R16;
case GL_RG16_SNORM:
return GL_RG16;
case GL_RG8_SNORM:
return GL_RG8;
default:
return gl_format;
}
}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
glMakeNamedBufferResidentNV(buffer.handle, access);
}
GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
})};
if (it != views.end()) {
return it->texture.handle;
}
OGLTexture texture;
texture.Create(GL_TEXTURE_BUFFER);
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
const GLenum texture_format{GetTextureBufferFormat(gl_format)};
if (texture_format != gl_format) {
LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
}
glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
views.push_back({
.offset = offset,
.size = size,
.format = format,
.texture = std::move(texture),
});
return views.back().texture.handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
if (use_assembly_shaders) {
if (use_storage_buffers) {
const GLuint base_binding = graphics_base_storage_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
} else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
if (use_assembly_shaders) {
if (use_storage_buffers) {
if (size != 0) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
}
} else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else if (size == 0) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
PixelFormat format) {
*texture_handles++ = buffer.View(offset, size, format);
}
void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
*image_handles++ = buffer.View(offset, size, format);
}
} // namespace OpenGL

View file

@ -32,6 +32,8 @@ public:
void MakeResident(GLenum access) noexcept;
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
@ -41,9 +43,17 @@ public:
}
private:
struct BufferView {
u32 offset;
u32 size;
VideoCore::Surface::PixelFormat format;
OGLTexture texture;
};
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
std::vector<BufferView> views;
};
class BufferCacheRuntime {
@ -75,17 +85,21 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
if (use_assembly_shaders) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(size));
glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
}
}
@ -103,7 +117,7 @@ public:
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@ -118,6 +132,27 @@ public:
return has_fast_buffer_sub_data;
}
[[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
return !use_assembly_shaders;
}
void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_uniform_bindings = bindings;
}
void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_storage_bindings = bindings;
}
void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
texture_handles = texture_handles_;
image_handles = image_handles_;
}
void SetEnableStorageBuffers(bool use_storage_buffers_) {
use_storage_buffers = use_storage_buffers_;
}
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@ -131,8 +166,15 @@ private:
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
bool use_storage_buffers = false;
u32 max_attributes = 0;
std::array<GLuint, 5> graphics_base_uniform_bindings{};
std::array<GLuint, 5> graphics_base_storage_bindings{};
GLuint* texture_handles = nullptr;
GLuint* image_handles = nullptr;
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@ -156,6 +198,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

View file

@ -0,0 +1,209 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "common/cityhash.h"
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
template <typename Range>
u32 AccumulateCount(const Range& range) {
u32 num{};
for (const auto& desc : range) {
num += desc.count;
}
return num;
}
size_t ComputePipelineKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
}
bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
return std::memcmp(this, &rhs, sizeof *this) == 0;
}
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::KeplerCompute& kepler_compute_,
ProgramManager& program_manager_, const Shader::Info& info_,
std::string code, std::vector<u32> code_v)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
break;
case Settings::ShaderBackend::GLASM:
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
break;
case Settings::ShaderBackend::SPIRV:
source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
break;
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
ASSERT(num_textures <= MAX_TEXTURES);
const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
ASSERT(num_images <= MAX_IMAGES);
const bool is_glasm{assembly_program.handle != 0};
const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
use_storage_buffers =
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
}
void ComputePipeline::Configure() {
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
std::array<GLuint, MAX_TEXTURES> samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
GLsizei texture_binding{};
GLsizei image_binding{};
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
samplers[sampler_binding++] = 0;
}
}
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers[sampler_binding++] = sampler->Handle();
}
}
std::ranges::for_each(info.image_descriptors, add_image);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
program_manager.BindComputeProgram(source_program.handle);
}
buffer_cache.UnbindComputeTextureBuffers();
size_t texbuf_index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++texbuf_index;
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
buffer_cache.UpdateComputeBuffers();
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
textures[texture_binding++] = image_view.Handle(desc.type);
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
}
}
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
glBindSamplers(0, sampler_binding, samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
}
} // namespace OpenGL

View file

@ -0,0 +1,93 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <type_traits>
#include <utility>
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines {
class KeplerCompute;
}
namespace Shader {
struct Info;
}
namespace OpenGL {
class Device;
class ProgramManager;
struct ComputePipelineKey {
u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
size_t Hash() const noexcept;
bool operator==(const ComputePipelineKey&) const noexcept;
bool operator!=(const ComputePipelineKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::KeplerCompute& kepler_compute_,
ProgramManager& program_manager_, const Shader::Info& info_,
std::string code, std::vector<u32> code_v);
void Configure();
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
return writes_global_memory;
}
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::KeplerCompute& kepler_compute;
ProgramManager& program_manager;
Shader::Info info;
OGLProgram source_program;
OGLAssemblyProgram assembly_program;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
u32 num_texture_buffers{};
u32 num_image_buffers{};
bool use_storage_buffers{};
bool writes_global_memory{};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::ComputePipelineKey> {
size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View file

@ -17,39 +17,17 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
};
constexpr std::array LIMIT_SAMPLERS = {
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_TEXTURE_IMAGE_UNITS,
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
};
constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
};
template <typename T>
T GetInteger(GLenum pname) {
@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
ASSERT(num >= amount);
if (limit) {
amount = std::min(amount, GetInteger<u32>(*limit));
}
num -= amount;
return std::exchange(base, base + amount);
}
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(),
[](GLenum pname) { return GetInteger<u32>(pname); });
std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Shader::MaxStageTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
return max;
}
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
u32 num_ubos = total_ubos - ReservedUniformBlocks;
u32 num_ssbos = total_ssbos;
u32 num_samplers = total_samplers;
u32 base_ubo = ReservedUniformBlocks;
u32 base_ssbo = 0;
u32 base_samplers = 0;
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
Extract(base_samplers, num_samplers, total_samplers / NumStages,
LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
u32 base_images = 0;
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
// fragment stage, and at least 1 for the rest of the stages.
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
// Reserve the other image bindings.
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
if (stage == 4) {
continue;
}
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
bindings[5] = {0, 0, 0, 0};
return bindings;
}
bool IsASTCSupported() {
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
static constexpr std::array formats = {
static constexpr std::array targets{
GL_TEXTURE_2D,
GL_TEXTURE_2D_ARRAY,
};
static constexpr std::array formats{
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@ -172,11 +87,10 @@ bool IsASTCSupported() {
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
static constexpr std::array required_support = {
static constexpr std::array required_support{
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
@ -223,14 +137,13 @@ Device::Device() {
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
max_uniform_buffers = BuildMaxUniformBuffers();
base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@ -243,18 +156,30 @@ Device::Device() {
has_precise_bug = TestPreciseBug();
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_derivative_control = GLAD_GL_ARB_derivative_control;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
shader_backend = Settings::values.shader_backend.GetValue();
use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
shader_backend = Settings::ShaderBackend::GLSL;
}
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux));
@ -265,11 +190,6 @@ Device::Device() {
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
has_broken_texture_view_formats);
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
}
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
}
@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
return vendor_name;
}
Device::Device(std::nullptr_t) {
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
uniform_buffer_alignment = 4;
shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
max_compute_shared_memory_size = 0x10000;
has_warp_intrinsics = true;
has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
has_texture_shadow_lod = true;
has_variable_aoffi = true;
has_depth_buffer_float = true;
}
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.

View file

@ -6,34 +6,22 @@
#include <cstddef>
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
#include "shader_recompiler/stage.h"
namespace Settings {
enum class ShaderBackend : u32;
};
namespace OpenGL {
class Device {
public:
struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
u32 image{};
};
explicit Device();
explicit Device(std::nullptr_t);
[[nodiscard]] std::string GetVendorName() const;
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
}
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
return base_bindings[stage_index];
}
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
return GetBaseBindings(static_cast<std::size_t>(shader_type));
u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
return max_uniform_buffers[static_cast<size_t>(stage)];
}
size_t GetUniformBufferAlignment() const {
@ -56,6 +44,10 @@ public:
return max_compute_shared_memory_size;
}
u32 GetMaxGLASMStorageBufferBlocks() const {
return max_glasm_storage_buffer_blocks;
}
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@ -108,6 +100,10 @@ public:
return has_nv_viewport_array2;
}
bool HasDerivativeControl() const {
return has_derivative_control;
}
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
@ -128,18 +124,52 @@ public:
return has_depth_buffer_float;
}
bool HasGeometryShaderPassthrough() const {
return has_geometry_shader_passthrough;
}
bool HasNvGpuShader5() const {
return has_nv_gpu_shader_5;
}
bool HasShaderInt64() const {
return has_shader_int64;
}
bool HasAmdShaderHalfFloat() const {
return has_amd_shader_half_float;
}
bool HasSparseTexture2() const {
return has_sparse_texture_2;
}
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
bool NeedsFastmathOff() const {
return need_fastmath_off;
}
Settings::ShaderBackend GetShaderBackend() const {
return shader_backend;
}
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
std::string vendor_name;
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
u32 max_glasm_storage_buffer_blocks{};
Settings::ShaderBackend shader_backend{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
@ -153,11 +183,21 @@ private:
bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool has_derivative_control{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
bool has_depth_buffer_float{};
bool has_geometry_shader_passthrough{};
bool has_nv_gpu_shader_5{};
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
std::string vendor_name;
};
} // namespace OpenGL

View file

@ -0,0 +1,572 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <string>
#include <vector>
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache.h"
#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
#else
#define LAMBDA_FORCEINLINE
#endif
namespace OpenGL {
namespace {
using Shader::ImageBufferDescriptor;
using Shader::ImageDescriptor;
using Shader::TextureBufferDescriptor;
using Shader::TextureDescriptor;
using Tegra::Texture::TexturePair;
using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
template <typename Range>
u32 AccumulateCount(const Range& range) {
u32 num{};
for (const auto& desc : range) {
num += desc.count;
}
return num;
}
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
return GL_VERTEX_SHADER;
case 1:
return GL_TESS_CONTROL_SHADER;
case 2:
return GL_TESS_EVALUATION_SHADER;
case 3:
return GL_GEOMETRY_SHADER;
case 4:
return GL_FRAGMENT_SHADER;
}
UNREACHABLE_MSG("{}", stage_index);
return GL_NONE;
}
GLenum AssemblyStage(size_t stage_index) {
switch (stage_index) {
case 0:
return GL_VERTEX_PROGRAM_NV;
case 1:
return GL_TESS_CONTROL_PROGRAM_NV;
case 2:
return GL_TESS_EVALUATION_PROGRAM_NV;
case 3:
return GL_GEOMETRY_PROGRAM_NV;
case 4:
return GL_FRAGMENT_PROGRAM_NV;
}
UNREACHABLE_MSG("{}", stage_index);
return GL_NONE;
}
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
const u8 index = location / 4;
if (index >= 8 && index <= 39) {
return {GL_GENERIC_ATTRIB_NV, index - 8};
}
if (index >= 48 && index <= 55) {
return {GL_TEXTURE_COORD_NV, index - 48};
}
switch (index) {
case 7:
return {GL_POSITION, 0};
case 40:
return {GL_PRIMARY_COLOR_NV, 0};
case 41:
return {GL_SECONDARY_COLOR_NV, 0};
case 42:
return {GL_BACK_PRIMARY_COLOR_NV, 0};
case 43:
return {GL_BACK_SECONDARY_COLOR_NV, 0};
}
UNIMPLEMENTED_MSG("index={}", index);
return {GL_POSITION, 0};
}
template <typename Spec>
bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
return false;
}
const auto& info{stage_infos[stage]};
if constexpr (!Spec::has_storage_buffers) {
if (!info.storage_buffers_descriptors.empty()) {
return false;
}
}
if constexpr (!Spec::has_texture_buffers) {
if (!info.texture_buffer_descriptors.empty()) {
return false;
}
}
if constexpr (!Spec::has_image_buffers) {
if (!info.image_buffer_descriptors.empty()) {
return false;
}
}
if constexpr (!Spec::has_images) {
if (!info.image_descriptors.empty()) {
return false;
}
}
}
return true;
}
using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
template <typename Spec, typename... Specs>
ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
if constexpr (sizeof...(Specs) > 0) {
if (!Passes<Spec>(stage_infos, enabled_mask)) {
return FindSpec<Specs...>(stage_infos, enabled_mask);
}
}
return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
}
struct SimpleVertexFragmentSpec {
static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
static constexpr bool has_storage_buffers = false;
static constexpr bool has_texture_buffers = false;
static constexpr bool has_image_buffers = false;
static constexpr bool has_images = false;
};
struct SimpleVertexSpec {
static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
static constexpr bool has_storage_buffers = false;
static constexpr bool has_texture_buffers = false;
static constexpr bool has_image_buffers = false;
static constexpr bool has_images = false;
};
struct DefaultSpec {
static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
static constexpr bool has_storage_buffers = true;
static constexpr bool has_texture_buffers = true;
static constexpr bool has_image_buffers = true;
static constexpr bool has_images = true;
};
ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
}
} // Anonymous namespace
GraphicsPipeline::GraphicsPipeline(
const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
const GraphicsPipelineKey& key_)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
state_tracker{state_tracker_}, key{key_} {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
}
u32 num_textures{};
u32 num_images{};
u32 num_storage_buffers{};
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
auto& info{stage_infos[stage]};
if (infos[stage]) {
info = *infos[stage];
enabled_stages_mask |= 1u << stage;
}
if (stage < 4) {
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
}
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
num_texture_buffers[stage] += num_tex_buffer_bindings;
num_textures += num_tex_buffer_bindings;
const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
num_image_buffers[stage] += num_img_buffers_bindings;
num_images += num_img_buffers_bindings;
num_textures += AccumulateCount(info.texture_descriptors);
num_images += AccumulateCount(info.image_descriptors);
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
const bool assembly_shaders{assembly_programs[0].handle != 0};
use_storage_buffers =
!assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory &= !use_storage_buffers;
configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
if (key.xfb_enabled && device.UseAssemblyShaders()) {
GenerateTransformFeedbackState();
}
const bool in_parallel = thread_worker != nullptr;
const auto backend = device.GetShaderBackend();
auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
case Settings::ShaderBackend::GLSL:
if (!sources[stage].empty()) {
source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
}
break;
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
if (in_parallel) {
// Make sure program is built before continuing when building in parallel
glGetString(GL_PROGRAM_ERROR_STRING_NV);
}
}
break;
case Settings::ShaderBackend::SPIRV:
if (!sources_spirv[stage].empty()) {
source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
}
break;
}
}
if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
// Make sure programs have built if we are building shaders in parallel
for (OGLProgram& program : source_programs) {
if (program.handle != 0) {
GLint status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
}
}
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
is_built = true;
built_condvar.notify_one();
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
} else {
func(nullptr);
}
}
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
std::array<GLuint, MAX_TEXTURES> samplers;
size_t image_view_index{};
GLsizei sampler_binding{};
texture_cache.SynchronizeGraphicsDescriptors();
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
const auto& regs{maxwell3d.regs};
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
const Shader::Info& info{stage_infos[stage]};
buffer_cache.UnbindGraphicsStorageBuffers(stage);
if constexpr (Spec::has_storage_buffers) {
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
desc.cbuf_offset, desc.is_written);
++ssbo_index;
}
}
const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(cbufs[desc.cbuf_index].enabled);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
second_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
const u32 raw{lhs_raw | rhs_raw};
return TexturePair(raw, via_header_index);
}
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
}
}};
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
samplers[sampler_binding++] = 0;
}
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc);
}
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_binding++] = sampler->Handle();
}
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
add_image(desc);
}
}
}};
if constexpr (Spec::enabled_stages[0]) {
config_stage(0);
}
if constexpr (Spec::enabled_stages[1]) {
config_stage(1);
}
if constexpr (Spec::enabled_stages[2]) {
config_stage(2);
}
if constexpr (Spec::enabled_stages[3]) {
config_stage(3);
}
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
const std::span indices_span(image_view_indices.data(), image_view_index);
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
ImageId* texture_buffer_index{image_view_ids.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
++texture_buffer_index;
}
}};
const Shader::Info& info{stage_infos[stage]};
buffer_cache.UnbindGraphicsTextureBuffers(stage);
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
add_buffer(desc);
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
add_buffer(desc);
}
}
for (const auto& desc : info.texture_descriptors) {
texture_buffer_index += desc.count;
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
texture_buffer_index += desc.count;
}
}
}};
if constexpr (Spec::enabled_stages[0]) {
bind_stage_info(0);
}
if constexpr (Spec::enabled_stages[1]) {
bind_stage_info(1);
}
if constexpr (Spec::enabled_stages[2]) {
bind_stage_info(2);
}
if constexpr (Spec::enabled_stages[3]) {
bind_stage_info(3);
}
if constexpr (Spec::enabled_stages[4]) {
bind_stage_info(4);
}
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
if (!is_built.load(std::memory_order::relaxed)) {
WaitForBuild();
}
if (assembly_programs[0].handle != 0) {
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
} else {
program_manager.BindSourcePrograms(source_programs);
}
const ImageId* views_it{image_view_ids.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
const auto prepare_stage{[&](size_t stage) {
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
buffer_cache.BindHostStageBuffers(stage);
texture_binding += num_texture_buffers[stage];
image_binding += num_image_buffers[stage];
views_it += num_texture_buffers[stage];
views_it += num_image_buffers[stage];
const auto& info{stage_infos[stage]};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
textures[texture_binding++] = image_view.Handle(desc.type);
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
}
}
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);
}
if constexpr (Spec::enabled_stages[1]) {
prepare_stage(1);
}
if constexpr (Spec::enabled_stages[2]) {
prepare_stage(2);
}
if constexpr (Spec::enabled_stages[3]) {
prepare_stage(3);
}
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
glBindSamplers(0, sampler_binding, samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
}
void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
}
void GraphicsPipeline::GenerateTransformFeedbackState() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
GLint* cursor{xfb_attribs.data()};
GLint* current_stream{xfb_streams.data()};
for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
const auto& layout = key.xfb_state.layouts[feedback];
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
if (layout.varying_count == 0) {
continue;
}
*current_stream = static_cast<GLint>(feedback);
if (current_stream != xfb_streams.data()) {
// When stepping one stream, push the expected token
cursor[0] = GL_NEXT_BUFFER_NV;
cursor[1] = 0;
cursor[2] = 0;
cursor += XFB_ENTRY_STRIDE;
}
++current_stream;
const auto& locations = key.xfb_state.varyings[feedback];
std::optional<u8> current_index;
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
const u8 location = locations[offset];
const u8 index = location / 4;
if (current_index == index) {
// Increase number of components of the previous attachment
++cursor[-2];
continue;
}
current_index = index;
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
cursor[1] = 1;
cursor += XFB_ENTRY_STRIDE;
}
}
num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
}
void GraphicsPipeline::WaitForBuild() {
std::unique_lock lock{built_mutex};
built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
}
} // namespace OpenGL

View file

@ -0,0 +1,169 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <utility>
#include "common/bit_field.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/transform_feedback.h"
namespace OpenGL {
namespace ShaderContext {
struct Context;
}
class Device;
class ProgramManager;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
struct GraphicsPipelineKey {
std::array<u64, 6> unique_hashes;
union {
u32 raw;
BitField<0, 1, u32> xfb_enabled;
BitField<1, 1, u32> early_z;
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
};
std::array<u32, 3> padding;
VideoCommon::TransformFeedbackState xfb_state;
size_t Hash() const noexcept {
return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
}
bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
return !operator==(rhs);
}
[[nodiscard]] size_t Size() const noexcept {
if (xfb_enabled != 0) {
return sizeof(GraphicsPipelineKey);
} else {
return offsetof(GraphicsPipelineKey, padding);
}
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
const GraphicsPipelineKey& key_);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
}
void ConfigureTransformFeedback() const {
if (num_xfb_attribs != 0) {
ConfigureTransformFeedbackImpl();
}
}
[[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
return key;
}
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
return writes_global_memory;
}
[[nodiscard]] bool IsBuilt() const noexcept {
return is_built.load(std::memory_order::relaxed);
}
template <typename Spec>
static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pipeline, bool is_indexed) {
pipeline->ConfigureImpl<Spec>(is_indexed);
};
}
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
void ConfigureTransformFeedbackImpl() const;
void GenerateTransformFeedbackState();
void WaitForBuild();
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
ProgramManager& program_manager;
StateTracker& state_tracker;
const GraphicsPipelineKey key;
void (*configure_func)(GraphicsPipeline*, bool){};
std::array<OGLProgram, 5> source_programs;
std::array<OGLAssemblyProgram, 5> assembly_programs;
u32 enabled_stages_mask{};
std::array<Shader::Info, 5> stage_infos{};
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
std::array<u32, 5> base_uniform_bindings{};
std::array<u32, 5> base_storage_bindings{};
std::array<u32, 5> num_texture_buffers{};
std::array<u32, 5> num_image_buffers{};
bool use_storage_buffers{};
bool writes_global_memory{};
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
GLsizei num_xfb_strides{};
std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
std::mutex built_mutex;
std::condition_variable built_condvar;
std::atomic_bool is_built{false};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::GraphicsPipelineKey> {
size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View file

@ -23,7 +23,6 @@
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
@ -40,7 +39,6 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using GLvec4 = std::array<GLfloat, 4>;
using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
struct TextureHandle {
constexpr TextureHandle(u32 data, bool via_header_index) {
const Tegra::Texture::TextureHandle handle{data};
image = handle.tic_id;
sampler = via_header_index ? image : handle.tsc_id.Value();
}
u32 image;
u32 sampler;
};
template <typename Engine, typename Entry>
TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
ShaderType shader_type, size_t index = 0) {
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
const u32 buffer_2 = entry.secondary_buffer;
const u32 offset_1 = entry.offset;
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
return TextureHandle(handle_1 | handle_2, via_header_index);
}
}
if (entry.is_bindless) {
const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
return TextureHandle(raw, via_header_index);
}
const u32 buffer = engine.GetBoundBuffer();
const u64 offset = (entry.offset + index) * sizeof(u32);
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
const u8 index = location / 4;
if (index >= 8 && index <= 39) {
return {GL_GENERIC_ATTRIB_NV, index - 8};
}
if (index >= 48 && index <= 55) {
return {GL_TEXTURE_COORD_NV, index - 48};
}
switch (index) {
case 7:
return {GL_POSITION, 0};
case 40:
return {GL_PRIMARY_COLOR_NV, 0};
case 41:
return {GL_SECONDARY_COLOR_NV, 0};
case 42:
return {GL_BACK_PRIMARY_COLOR_NV, 0};
case 43:
return {GL_BACK_SECONDARY_COLOR_NV, 0};
}
UNIMPLEMENTED_MSG("index={}", index);
return {GL_POSITION, 0};
}
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::Buffer;
}
switch (entry.type) {
case Tegra::Shader::TextureType::Texture1D:
return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
case Tegra::Shader::TextureType::Texture2D:
return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
case Tegra::Shader::TextureType::Texture3D:
return ImageViewType::e3D;
case Tegra::Shader::TextureType::TextureCube:
return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
}
UNREACHABLE();
return ImageViewType::e2D;
}
ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
switch (entry.type) {
case Tegra::Shader::ImageType::Texture1D:
return ImageViewType::e1D;
case Tegra::Shader::ImageType::Texture1DArray:
return ImageViewType::e1DArray;
case Tegra::Shader::ImageType::Texture2D:
return ImageViewType::e2D;
case Tegra::Shader::ImageType::Texture2DArray:
return ImageViewType::e2DArray;
case Tegra::Shader::ImageType::Texture3D:
return ImageViewType::e3D;
case Tegra::Shader::ImageType::TextureBuffer:
return ImageViewType::Buffer;
}
UNREACHABLE();
return ImageViewType::e2D;
}
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
}
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
if (attrib.IsConstant()) {
if (attrib.constant) {
glDisableVertexAttribArray(gl_index);
continue;
}
@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
}
}
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
u32 clip_distances = 0;
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
image_view_indices.clear();
sampler_handles.clear();
texture_cache.SynchronizeGraphicsDescriptors();
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(0);
break;
case Maxwell::ShaderProgram::Fragment:
program_manager.UseFragmentShader(0);
break;
default:
break;
}
continue;
}
// Currently this stages are not supported in the OpenGL backend.
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
if (program == Maxwell::ShaderProgram::TesselationControl ||
program == Maxwell::ShaderProgram::TesselationEval) {
continue;
}
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
program_manager.UseVertexShader(program_handle);
break;
case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(program_handle);
break;
case Maxwell::ShaderProgram::Fragment:
program_manager.UseFragmentShader(program_handle);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
break;
}
// Stage indices are 0 - 5
const size_t stage = index == 0 ? 0 : index - 1;
shaders[stage] = shader;
SetupDrawTextures(shader, stage);
SetupDrawImages(shader, stage);
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
buffer_cache.UnbindGraphicsStorageBuffers(stage);
u32 ssbo_index = 0;
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
buffer.cbuf_offset, buffer.is_written);
++ssbo_index;
}
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
// clip distances only when it's written by a shader stage.
clip_distances |= shader->GetEntries().clip_distances;
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
++index;
}
}
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
buffer_cache.UpdateGraphicsBuffers(is_indexed);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
buffer_cache.BindHostGeometryBuffers(is_indexed);
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
const Shader* const shader = shaders[stage];
if (!shader) {
continue;
}
buffer_cache.BindHostStageBuffers(stage);
const auto& base = device.GetBaseBindings(stage);
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
texture_index, image_index);
}
}
void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
shader_cache.LoadDiskResources(title_id, stop_loading, callback);
}
void RasterizerOpenGL::Clear() {
@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncState();
// Setup shaders and their used resources.
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
}
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
SetupShaders(is_indexed);
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
pipeline->Configure(is_indexed);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
EndTransformFeedback();
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
BindComputeTextures(kernel);
const auto& entries = kernel->GetEntries();
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
buffer_cache.UnbindComputeStorageBuffers();
u32 ssbo_index = 0;
for (const auto& buffer : entries.global_memory_entries) {
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
buffer.is_written);
++ssbo_index;
void RasterizerOpenGL::DispatchCompute() {
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
if (!pipeline) {
return;
}
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
const auto& launch_desc = kepler_compute.launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
pipeline->Configure();
const auto& qmd{kepler_compute.launch_description};
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() {
}
void RasterizerOpenGL::FragmentBarrier() {
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
void RasterizerOpenGL::TiledCacheBarrier() {
@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
return;
}
num_queued_commands = 0;
// Make sure memory stored from the previous GL command stream is visible
// This is only needed on assembly shaders where we write to GPU memory with raw pointers
if (has_written_global_memory) {
has_written_global_memory = false;
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
}
glFlush();
}
@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
image_view_indices.clear();
sampler_handles.clear();
texture_cache.SynchronizeComputeDescriptors();
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
program_manager.BindCompute(kernel->GetHandle());
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
}
void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
GLuint base_image, size_t& image_view_index,
size_t& texture_index, size_t& image_index) {
const GLuint* const samplers = sampler_handles.data() + texture_index;
const GLuint* const textures = texture_handles.data() + texture_index;
const GLuint* const images = image_handles.data() + image_index;
const size_t num_samplers = entries.samplers.size();
for (const auto& sampler : entries.samplers) {
for (size_t i = 0; i < sampler.size; ++i) {
const ImageViewId image_view_id = image_view_ids[image_view_index++];
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
texture_handles[texture_index++] = handle;
}
}
const size_t num_images = entries.images.size();
for (size_t unit = 0; unit < num_images; ++unit) {
// TODO: Mark as modified
const ImageViewId image_view_id = image_view_ids[image_view_index++];
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
image_handles[image_index] = handle;
++image_index;
}
if (num_samplers > 0) {
glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
}
if (num_images > 0) {
glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
}
}
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
for (size_t index = 0; index < entry.size; ++index) {
const auto handle =
GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
sampler_handles.push_back(sampler->Handle());
image_view_indices.push_back(handle.image);
}
}
}
void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : kernel->GetEntries().samplers) {
for (size_t i = 0; i < entry.size; ++i) {
const auto handle =
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
sampler_handles.push_back(sampler->Handle());
image_view_indices.push_back(handle.image);
}
}
}
void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<ShaderType>(stage_index);
const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
image_view_indices.push_back(handle.image);
}
}
void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : shader->GetEntries().images) {
const auto handle =
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
image_view_indices.push_back(handle.image);
}
}
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
void RasterizerOpenGL::SyncTransformFeedback() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
const auto& regs = maxwell3d.regs;
static constexpr std::size_t STRIDE = 3;
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
GLint* cursor = attribs.data();
GLint* current_stream = streams.data();
for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
const auto& layout = regs.tfb_layouts[feedback];
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
if (layout.varying_count == 0) {
continue;
}
*current_stream = static_cast<GLint>(feedback);
if (current_stream != streams.data()) {
// When stepping one stream, push the expected token
cursor[0] = GL_NEXT_BUFFER_NV;
cursor[1] = 0;
cursor[2] = 0;
cursor += STRIDE;
}
++current_stream;
const auto& locations = regs.tfb_varying_locs[feedback];
std::optional<u8> current_index;
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
const u8 location = locations[offset];
const u8 index = location / 4;
if (current_index == index) {
// Increase number of components of the previous attachment
++cursor[-2];
continue;
}
current_index = index;
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
cursor[1] = 1;
cursor += STRIDE;
}
}
const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
GL_INTERLEAVED_ATTRIBS);
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
program->ConfigureTransformFeedback();
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
if (maxwell3d.regs.tfb_enabled != 0) {
glEndTransformFeedback();
}
glEndTransformFeedback();
}
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}

View file

@ -28,11 +28,9 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
namespace Core::Memory {
@ -81,7 +79,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@ -118,36 +116,11 @@ public:
return num_queued_commands > 0;
}
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
}
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
return async_shaders;
}
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
void BindComputeTextures(Shader* kernel);
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(const Shader* shader, size_t stage_index);
/// Configures the textures used in a compute shader.
void SetupComputeTextures(const Shader* kernel);
/// Configures images in a graphics shader.
void SetupDrawImages(const Shader* shader, size_t stage_index);
/// Configures images in a compute shader.
void SetupComputeImages(const Shader* shader);
/// Syncs state to match guest's
void SyncState();
@ -220,18 +193,12 @@ private:
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
void SetupShaders(bool is_indexed);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
@ -246,13 +213,11 @@ private:
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
ShaderCacheOpenGL shader_cache;
ShaderCache shader_cache;
QueryCache query_cache;
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@ -260,7 +225,8 @@ private:
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
size_t num_queued_commands = 0;
bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};

View file

@ -83,18 +83,6 @@ void OGLSampler::Release() {
handle = 0;
}
void OGLShader::Create(std::string_view source, GLenum type) {
if (handle != 0) {
return;
}
if (source.empty()) {
return;
}
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = GLShader::LoadShader(source, type);
}
void OGLShader::Release() {
if (handle == 0)
return;
@ -104,21 +92,6 @@ void OGLShader::Release() {
handle = 0;
}
void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
const char* frag_shader, bool separable_program,
bool hint_retrievable) {
OGLShader vert, geo, frag;
if (vert_shader)
vert.Create(vert_shader, GL_VERTEX_SHADER);
if (geo_shader)
geo.Create(geo_shader, GL_GEOMETRY_SHADER);
if (frag_shader)
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
}
void OGLProgram::Release() {
if (handle == 0)
return;

View file

@ -8,7 +8,6 @@
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
@ -128,8 +127,6 @@ public:
return *this;
}
void Create(std::string_view source, GLenum type);
void Release();
GLuint handle = 0;
@ -151,17 +148,6 @@ public:
return *this;
}
template <typename... T>
void Create(bool separable_program, bool hint_retrievable, T... shaders) {
if (handle != 0)
return;
handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
}
/// Creates a new internal OpenGL resource and stores the handle
void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
bool separable_program = false, bool hint_retrievable = false);
/// Deletes the internal OpenGL resource
void Release();

File diff suppressed because it is too large Load diff

View file

@ -5,157 +5,93 @@
#pragma once
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <string>
#include <tuple>
#include <filesystem>
#include <stop_token>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "common/thread_worker.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace VideoCommon::Shader {
class AsyncShaders;
}
namespace OpenGL {
class Device;
class ProgramManager;
class RasterizerOpenGL;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ProgramHandle {
OGLProgram source_program;
OGLAssemblyProgram assembly_program;
};
using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
struct PrecompiledShader {
ProgramSharedPtr program;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
};
struct ShaderParameters {
Tegra::GPU& gpu;
Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr;
const u8* host_ptr;
u64 unique_identifier;
};
ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
bool hint_retrievable = false);
class Shader final {
class ShaderCache : public VideoCommon::ShaderCache {
public:
~Shader();
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
bool IsBuilt() const;
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
const VideoCommon::Shader::Registry& GetRegistry() const {
return *registry;
}
/// Mark a OpenGL shader as built
void AsyncOpenGLBuilt(OGLProgram new_program);
/// Mark a GLASM shader as built
void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
static std::unique_ptr<Shader> CreateStageFromMemory(
const ShaderParameters& params, Maxwell::ShaderProgram program_type,
ProgramCode program_code, ProgramCode program_code_b,
VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code);
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader);
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
private:
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
ProgramSharedPtr program, bool is_built_ = true);
GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
ProgramSharedPtr program;
GLuint handle = 0;
bool is_built{};
};
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_);
~ShaderCacheOpenGL() override;
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
/// Loads disk cache for the current game
void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel);
/// Gets the current specified shader stage program
Shader* GetStageProgram(Maxwell::ShaderProgram program,
VideoCommon::Shader::AsyncShaders& async_shaders);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
/// Gets a compute kernel in the passed address
Shader* GetComputeKernel(GPUVAddr code_addr);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
Shader::Environment& env);
private:
ProgramSharedPtr GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
ProgramManager& program_manager;
StateTracker& state_tracker;
VideoCore::ShaderNotify& shader_notify;
const bool use_asynchronous_shaders;
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
GraphicsPipelineKey graphics_key{};
GraphicsPipeline* current_pipeline{};
std::unique_ptr<Shader> null_shader;
std::unique_ptr<Shader> null_kernel;
ShaderContext::ShaderPools main_pools;
std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path shader_cache_filename;
std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL

View file

@ -0,0 +1,33 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/frontend/emu_window.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
namespace OpenGL::ShaderContext {
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
struct Context {
explicit Context(Core::Frontend::EmuWindow& emu_window)
: gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
Core::Frontend::GraphicsContext::Scoped scoped;
ShaderPools pools;
};
} // namespace OpenGL::ShaderContext

File diff suppressed because it is too large Load diff

View file

@ -1,69 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
using ImageEntry = VideoCommon::Shader::ImageEntry;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
: ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
u32 GetIndex() const {
return index;
}
private:
u32 index = 0;
};
struct GlobalMemoryEntry {
constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
bool is_written_)
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
is_written_} {}
u32 cbuf_index = 0;
u32 cbuf_offset = 0;
bool is_read = false;
bool is_written = false;
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
u32 enabled_uniform_buffers{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage);
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier,
std::string_view suffix = {});
} // namespace OpenGL

View file

@ -1,482 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/fs/file.h"
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
#include "common/settings.h"
#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/k_process.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::BindlessSamplerMap;
using VideoCommon::Shader::BoundSamplerMap;
using VideoCommon::Shader::KeyMap;
using VideoCommon::Shader::SeparateSamplerKey;
using ShaderCacheVersionHash = std::array<u8, 64>;
struct ConstBufferKey {
u32 cbuf = 0;
u32 offset = 0;
u32 value = 0;
};
struct BoundSamplerEntry {
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct SeparateSamplerEntry {
u32 cbuf1 = 0;
u32 cbuf2 = 0;
u32 offset1 = 0;
u32 offset2 = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerEntry {
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
namespace {
constexpr u32 NativeVersion = 21;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
return hash;
}
} // Anonymous namespace
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
if (!file.ReadObject(type)) {
return false;
}
u32 code_size;
u32 code_size_b;
if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
return false;
}
code.resize(code_size);
code_b.resize(code_size_b);
if (file.Read(code) != code_size) {
return false;
}
if (HasProgramA() && file.Read(code_b) != code_size_b) {
return false;
}
u8 is_texture_handler_size_known;
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
u32 num_separate_samplers;
u32 num_bindless_samplers;
if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
!file.ReadObject(is_texture_handler_size_known) ||
!file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
!file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
!file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
!file.ReadObject(num_bindless_samplers)) {
return false;
}
if (is_texture_handler_size_known) {
texture_handler_size = texture_handler_size_value;
}
std::vector<ConstBufferKey> flat_keys(num_keys);
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
if (file.Read(flat_keys) != flat_keys.size() ||
file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
return false;
}
for (const auto& entry : flat_keys) {
keys.insert({{entry.cbuf, entry.offset}, entry.value});
}
for (const auto& entry : flat_bound_samplers) {
bound_samplers.emplace(entry.offset, entry.sampler);
}
for (const auto& entry : flat_separate_samplers) {
SeparateSamplerKey key;
key.buffers = {entry.cbuf1, entry.cbuf2};
key.offsets = {entry.offset1, entry.offset2};
separate_samplers.emplace(key, entry.sampler);
}
for (const auto& entry : flat_bindless_samplers) {
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
}
return true;
}
bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
if (!file.WriteObject(static_cast<u32>(type)) ||
!file.WriteObject(static_cast<u32>(code.size())) ||
!file.WriteObject(static_cast<u32>(code_b.size()))) {
return false;
}
if (file.Write(code) != code.size()) {
return false;
}
if (HasProgramA() && file.Write(code_b) != code_b.size()) {
return false;
}
if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
!file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
!file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
!file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
!file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
!file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
!file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
return false;
}
std::vector<ConstBufferKey> flat_keys;
flat_keys.reserve(keys.size());
for (const auto& [address, value] : keys) {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
std::vector<BoundSamplerEntry> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
}
std::vector<SeparateSamplerEntry> flat_separate_samplers;
flat_separate_samplers.reserve(separate_samplers.size());
for (const auto& [key, sampler] : separate_samplers) {
SeparateSamplerEntry entry;
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
std::tie(entry.offset1, entry.offset2) = key.offsets;
entry.sampler = sampler;
flat_separate_samplers.push_back(entry);
}
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
BindlessSamplerEntry{address.first, address.second, sampler});
}
return file.Write(flat_keys) == flat_keys.size() &&
file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
title_id = title_id_;
}
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = title_id != 0;
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
return std::nullopt;
}
Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
Common::FS::FileType::BinaryFile};
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
return std::nullopt;
}
u32 version{};
if (!file.ReadObject(version)) {
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
return std::nullopt;
}
if (version < NativeVersion) {
LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
file.Close();
InvalidateTransferable();
is_usable = true;
return std::nullopt;
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator, skipping");
return std::nullopt;
}
// Version is valid, load the shaders
std::vector<ShaderDiskCacheEntry> entries;
while (static_cast<u64>(file.Tell()) < file.GetSize()) {
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
return std::nullopt;
}
}
is_usable = true;
return {std::move(entries)};
}
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
Common::FS::FileType::BinaryFile};
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
}
if (const auto result = LoadPrecompiledFile(file)) {
return *result;
}
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
file.Close();
InvalidatePrecompiled();
return {};
}
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
Common::FS::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
if (file.Read(compressed) != file.GetSize()) {
return std::nullopt;
}
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
precompiled_cache_virtual_file_offset = 0;
ShaderCacheVersionHash file_hash{};
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
precompiled_cache_virtual_file_offset = 0;
return std::nullopt;
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
precompiled_cache_virtual_file_offset = 0;
return std::nullopt;
}
std::vector<ShaderDiskCachePrecompiled> entries;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
u32 binary_size;
auto& entry = entries.emplace_back();
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
return std::nullopt;
}
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
return std::nullopt;
}
}
return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
if (!Common::FS::RemoveFile(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
Common::FS::PathToUTF8String(GetTransferablePath()));
}
InvalidatePrecompiled();
}
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
// Clear virtaul precompiled cache file
precompiled_cache_virtual_file.Resize(0);
if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
Common::FS::PathToUTF8String(GetPrecompiledPath()));
}
}
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
if (!is_usable) {
return;
}
const u64 id = entry.unique_identifier;
if (stored_transferable.contains(id)) {
// The shader already exists
return;
}
Common::FS::IOFile file = AppendTransferableFile();
if (!file.IsOpen()) {
return;
}
if (!entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
}
stored_transferable.insert(id);
}
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
if (!is_usable) {
return;
}
// TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
// when writing the dump. This should be done the moment I get access to write to the virtual
// file.
if (precompiled_cache_virtual_file.GetSize() == 0) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
GLint binary_length;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format;
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
unique_identifier);
InvalidatePrecompiled();
}
}
Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
if (!EnsureDirectories()) {
return {};
}
const auto transferable_path{GetTransferablePath()};
const bool existed = Common::FS::Exists(transferable_path);
Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
Common::FS::FileType::BinaryFile};
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
Common::FS::PathToUTF8String(transferable_path));
return {};
}
if (!existed || file.GetSize() == 0) {
// If the file didn't exist, write its version
if (!file.WriteObject(NativeVersion)) {
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
Common::FS::PathToUTF8String(transferable_path));
return {};
}
}
return file;
}
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
const auto hash{GetShaderCacheVersionHash()};
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
LOG_ERROR(
Render_OpenGL,
"Failed to write precompiled cache version hash to virtual precompiled cache file");
}
}
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
precompiled_cache_virtual_file_offset = 0;
const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
const std::vector<u8> compressed =
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path = GetPrecompiledPath();
Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
Common::FS::FileType::BinaryFile};
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
Common::FS::PathToUTF8String(precompiled_path));
return;
}
if (file.Write(compressed) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
Common::FS::PathToUTF8String(precompiled_path));
}
}
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
const auto CreateDir = [](const std::filesystem::path& dir) {
if (!Common::FS::CreateDir(dir)) {
LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
Common::FS::PathToUTF8String(dir));
return false;
}
return true;
};
return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir());
}
std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
}
std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
}
std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
return GetBaseDir() / "transferable";
}
std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
return GetBaseDir() / "precompiled";
}
std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
return fmt::format("{:016X}", title_id);
}
} // namespace OpenGL

View file

@ -1,176 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <filesystem>
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
namespace Common::FS {
class IOFile;
}
namespace OpenGL {
using ProgramCode = std::vector<u64>;
/// Describes a shader and how it's used by the guest GPU
struct ShaderDiskCacheEntry {
ShaderDiskCacheEntry();
~ShaderDiskCacheEntry();
bool Load(Common::FS::IOFile& file);
bool Save(Common::FS::IOFile& file) const;
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
}
Tegra::Engines::ShaderType type{};
ProgramCode code;
ProgramCode code_b;
u64 unique_identifier = 0;
std::optional<u32> texture_handler_size;
u32 bound_buffer = 0;
VideoCommon::Shader::GraphicsInfo graphics_info;
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
/// Contains an OpenGL dumped binary program
struct ShaderDiskCachePrecompiled {
u64 unique_identifier = 0;
GLenum binary_format = 0;
std::vector<u8> binary;
};
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL();
/// Binds a title ID for all future operations.
void BindTitleID(u64 title_id);
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
/// Removes the precompiled cache file and clears virtual precompiled cache file.
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveEntry(const ShaderDiskCacheEntry& entry);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SavePrecompiled(u64 unique_identifier, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
Common::FS::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
Common::FS::IOFile AppendTransferableFile() const;
/// Save precompiled header to precompiled_cache_in_memory
void SavePrecompiledHeaderToVirtualPrecompiledCache();
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
/// Gets current game's transferable file path
std::filesystem::path GetTransferablePath() const;
/// Gets current game's precompiled file path
std::filesystem::path GetPrecompiledPath() const;
/// Get user's transferable directory path
std::filesystem::path GetTransferableDir() const;
/// Get user's precompiled directory path
std::filesystem::path GetPrecompiledDir() const;
/// Get user's shader directory path
std::filesystem::path GetBaseDir() const;
/// Get current game's title id
std::string GetTitleID() const;
template <typename T>
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
data, length, precompiled_cache_virtual_file_offset);
precompiled_cache_virtual_file_offset += write_length;
return write_length == sizeof(T) * length;
}
template <typename T>
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
data, length, precompiled_cache_virtual_file_offset);
precompiled_cache_virtual_file_offset += read_length;
return read_length == sizeof(T) * length;
}
template <typename T>
bool SaveObjectToPrecompiled(const T& object) {
return SaveArrayToPrecompiled(&object, 1);
}
bool SaveObjectToPrecompiled(bool object) {
const auto value = static_cast<u8>(object);
return SaveArrayToPrecompiled(&value, 1);
}
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
std::unordered_set<u64> stored_transferable;
/// Title ID to operate on
u64 title_id = 0;
// The cache has been loaded at boot
bool is_usable = false;
};
} // namespace OpenGL

View file

@ -1,149 +1,3 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL {
namespace {
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
if (current == old) {
return;
}
if (current == 0) {
if (enabled) {
enabled = false;
glDisable(stage);
}
return;
}
if (!enabled) {
enabled = true;
glEnable(stage);
}
glBindProgramARB(stage, current);
}
} // Anonymous namespace
ProgramManager::ProgramManager(const Device& device)
: use_assembly_programs{device.UseAssemblyShaders()} {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
} else {
graphics_pipeline.Create();
glBindProgramPipeline(graphics_pipeline.handle);
}
}
ProgramManager::~ProgramManager() = default;
void ProgramManager::BindCompute(GLuint program) {
if (use_assembly_programs) {
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
} else {
is_graphics_bound = false;
glUseProgram(program);
}
}
void ProgramManager::BindGraphicsPipeline() {
if (!use_assembly_programs) {
UpdateSourcePrograms();
}
}
void ProgramManager::BindHostPipeline(GLuint pipeline) {
if (use_assembly_programs) {
if (geometry_enabled) {
geometry_enabled = false;
old_state.geometry = 0;
glDisable(GL_GEOMETRY_PROGRAM_NV);
}
} else {
if (!is_graphics_bound) {
glUseProgram(0);
}
}
glBindProgramPipeline(pipeline);
}
void ProgramManager::RestoreGuestPipeline() {
if (use_assembly_programs) {
glBindProgramPipeline(0);
} else {
glBindProgramPipeline(graphics_pipeline.handle);
}
}
void ProgramManager::BindHostCompute(GLuint program) {
if (use_assembly_programs) {
glDisable(GL_COMPUTE_PROGRAM_NV);
}
glUseProgram(program);
is_graphics_bound = false;
}
void ProgramManager::RestoreGuestCompute() {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
glUseProgram(0);
}
}
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
}
current_state.vertex = program;
}
void ProgramManager::UseGeometryShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
}
current_state.geometry = program;
}
void ProgramManager::UseFragmentShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
}
current_state.fragment = program;
}
void ProgramManager::UpdateSourcePrograms() {
if (!is_graphics_bound) {
is_graphics_bound = true;
glUseProgram(0);
}
const GLuint handle = graphics_pipeline.handle;
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
if (current == old) {
return;
}
glUseProgramStages(handle, stage, current);
};
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
old_state = current_state;
}
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL

View file

@ -4,79 +4,142 @@
#pragma once
#include <cstddef>
#include <array>
#include <span>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
class Device;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
struct alignas(16) MaxwellUniformData {
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
GLfloat y_direction;
};
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
class ProgramManager {
public:
explicit ProgramManager(const Device& device);
~ProgramManager();
static constexpr size_t NUM_STAGES = 5;
/// Binds a compute program
void BindCompute(GLuint program);
/// Updates bound programs.
void BindGraphicsPipeline();
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
void BindHostPipeline(GLuint pipeline);
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
void BindHostCompute(GLuint program);
/// Rewinds BindHostCompute state changes.
void RestoreGuestCompute();
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);
private:
struct PipelineState {
GLuint vertex = 0;
GLuint geometry = 0;
GLuint fragment = 0;
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
/// Update GLSL programs.
void UpdateSourcePrograms();
public:
explicit ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
}
OGLPipeline graphics_pipeline;
void BindComputeProgram(GLuint program) {
glUseProgram(program);
is_compute_bound = true;
}
PipelineState current_state;
PipelineState old_state;
void BindComputeAssemblyProgram(GLuint program) {
if (current_assembly_compute_program != program) {
current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
}
UnbindPipeline();
}
bool use_assembly_programs = false;
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
bool is_graphics_bound = true;
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
bool vertex_enabled = false;
bool geometry_enabled = false;
bool fragment_enabled = false;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
}
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask) {
const u32 changed_mask = current_stage_mask ^ stage_mask;
current_stage_mask = stage_mask;
if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (((changed_mask >> stage) & 1) != 0) {
if (((stage_mask >> stage) & 1) != 0) {
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
} else {
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
}
}
}
}
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
}
}
UnbindPipeline();
}
void RestoreGuestCompute() {}
private:
void BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
void UnbindPipeline() {
if (is_pipeline_bound) {
is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
OGLPipeline pipeline;
bool is_pipeline_bound{};
bool is_compute_bound{};
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL

View file

@ -5,57 +5,108 @@
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL::GLShader {
namespace OpenGL {
namespace {
std::string_view StageDebugName(GLenum type) {
switch (type) {
case GL_VERTEX_SHADER:
return "vertex";
case GL_GEOMETRY_SHADER:
return "geometry";
case GL_FRAGMENT_SHADER:
return "fragment";
case GL_COMPUTE_SHADER:
return "compute";
static OGLProgram LinkSeparableProgram(GLuint shader) {
OGLProgram program;
program.handle = glCreateProgram();
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glAttachShader(program.handle, shader);
glLinkProgram(program.handle);
if (!Settings::values.renderer_debug) {
return program;
}
UNIMPLEMENTED();
return "unknown";
GLint link_status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return program;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
return program;
}
} // Anonymous namespace
static void LogShader(GLuint shader, std::string_view code = {}) {
GLint shader_status{};
glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "Failed to build shader");
}
GLint log_length{};
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetShaderInfoLog(shader, log_length, nullptr, log.data());
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
if (!code.empty()) {
LOG_INFO(Render_OpenGL, "\n{}", code);
}
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
}
GLuint LoadShader(std::string_view source, GLenum type) {
const std::string_view debug_type = StageDebugName(type);
const GLuint shader_id = glCreateShader(type);
OGLProgram CreateProgram(std::string_view code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
const GLchar* source_string = source.data();
const GLint source_length = static_cast<GLint>(source.size());
const GLint length = static_cast<GLint>(code.size());
const GLchar* const code_ptr = code.data();
glShaderSource(shader.handle, 1, &code_ptr, &length);
glCompileShader(shader.handle);
if (Settings::values.renderer_debug) {
LogShader(shader.handle, code);
}
return LinkSeparableProgram(shader.handle);
}
glShaderSource(shader_id, 1, &source_string, &source_length);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
GLint result = GL_FALSE;
GLint info_log_length;
glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
static_cast<GLsizei>(code.size_bytes()));
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
if (Settings::values.renderer_debug) {
LogShader(shader.handle);
}
return LinkSeparableProgram(shader.handle);
}
if (info_log_length > 1) {
std::string shader_error(info_log_length, ' ');
glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "{}", shader_error);
} else {
LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
OGLAssemblyProgram program;
glGenProgramsARB(1, &program.handle);
glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
static_cast<GLsizei>(code.size()), code.data());
if (Settings::values.renderer_debug) {
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
if (err && *err) {
if (std::strstr(err, "error")) {
LOG_CRITICAL(Render_OpenGL, "\n{}", err);
LOG_INFO(Render_OpenGL, "\n{}", code);
} else {
LOG_WARNING(Render_OpenGL, "\n{}", err);
}
}
}
return shader_id;
return program;
}
} // namespace OpenGL::GLShader
} // namespace OpenGL

View file

@ -4,92 +4,23 @@
#pragma once
#include <span>
#include <string>
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL::GLShader {
namespace OpenGL {
/**
* Utility function to log the source code of a list of shaders.
* @param shaders The OpenGL shaders whose source we will print.
*/
template <typename... T>
void LogShaderSource(T... shaders) {
auto shader_list = {shaders...};
OGLProgram CreateProgram(std::string_view code, GLenum stage);
for (const auto& shader : shader_list) {
if (shader == 0)
continue;
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
GLint source_length;
glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
std::string source(source_length, ' ');
glGetShaderSource(shader, source_length, nullptr, &source[0]);
LOG_INFO(Render_OpenGL, "Shader source {}", source);
}
}
/**
* Utility function to create and compile an OpenGL GLSL shader
* @param source String of the GLSL shader program
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
*/
GLuint LoadShader(std::string_view source, GLenum type);
/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
* @param separable_program whether to create a separable program
* @param shaders ID of shaders to attach to the program
* @returns Handle of the newly created OpenGL program object
*/
template <typename... T>
GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
// Link the program
LOG_DEBUG(Render_OpenGL, "Linking program...");
GLuint program_id = glCreateProgram();
((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
if (separable_program) {
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
}
if (hint_retrievable) {
glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
}
glLinkProgram(program_id);
// Check the program
GLint result = GL_FALSE;
GLint info_log_length;
glGetProgramiv(program_id, GL_LINK_STATUS, &result);
glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
if (info_log_length > 1) {
std::string program_error(info_log_length, ' ');
glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "{}", program_error);
} else {
LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
}
}
if (result == GL_FALSE) {
// There was a problem linking the shader, print the source for debugging purposes.
LogShaderSource(shaders...);
}
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
return program_id;
}
} // namespace OpenGL::GLShader
} // namespace OpenGL

View file

@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyShaders(Tables& tables) {
FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
Shaders);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);

View file

@ -52,7 +52,6 @@ enum : u8 {
BlendState0,
BlendState7 = BlendState0 + 7,
Shaders,
ClipDistances,
PolygonModes,

View file

@ -24,9 +24,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
namespace {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using Tegra::Texture::TextureType;
@ -59,107 +57,6 @@ struct CopyRegion {
GLsizei depth;
};
struct FormatTuple {
GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
constexpr std::array ACCELERATED_FORMATS{
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
switch (info.type) {
case ImageType::e1D:
@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
return GL_NONE;
}
GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
const bool is_multisampled = num_samples > 1;
switch (type) {
case ImageViewType::e1D:
case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
case ImageViewType::e2D:
case Shader::TextureType::Color2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
case ImageViewType::Cube:
case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
case ImageViewType::e3D:
case Shader::TextureType::Color3D:
return GL_TEXTURE_3D;
case ImageViewType::e1DArray:
case Shader::TextureType::ColorArray1D:
return GL_TEXTURE_1D_ARRAY;
case ImageViewType::e2DArray:
case Shader::TextureType::ColorArray2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
case ImageViewType::CubeArray:
case Shader::TextureType::ColorArrayCube:
return GL_TEXTURE_CUBE_MAP_ARRAY;
case ImageViewType::Rect:
return GL_TEXTURE_RECTANGLE;
case ImageViewType::Buffer:
case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
UNREACHABLE_MSG("Invalid image view type={}", type);
@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
default:
return false;
}
const GLenum internal_format = GetFormatTuple(info.format).internal_format;
const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
if (format_info.is_compressed) {
return false;
@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
const GLuint texture = image_view->DefaultHandle();
glNamedFramebufferTexture(fbo, attachment, texture, 0);
glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
return;
}
const GLuint texture = image_view->Handle(ImageViewType::e3D);
const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
if (image_view->range.extent.layers > 1) {
// TODO: OpenGL doesn't support rendering to a fixed number of slices
glNamedFramebufferTexture(fbo, attachment, texture, 0);
@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
switch (format) {
case Shader::ImageFormat::Typeless:
break;
case Shader::ImageFormat::R8_SINT:
return GL_R8I;
case Shader::ImageFormat::R8_UINT:
return GL_R8UI;
case Shader::ImageFormat::R16_UINT:
return GL_R16UI;
case Shader::ImageFormat::R16_SINT:
return GL_R16I;
case Shader::ImageFormat::R32_UINT:
return GL_R32UI;
case Shader::ImageFormat::R32G32_UINT:
return GL_RG32UI;
case Shader::ImageFormat::R32G32B32A32_UINT:
return GL_RGBA32UI;
}
UNREACHABLE_MSG("Invalid image format={}", format);
return GL_R32UI;
}
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
for (const FormatTuple& tuple : FORMAT_TABLE) {
for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
const GLenum format = tuple.internal_format;
GLint compat_class;
GLint compat_type;
@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
null_image_3d.Create(GL_TEXTURE_3D);
null_image_rect.Create(GL_TEXTURE_RECTANGLE);
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
std::array<GLuint, 4> new_handles;
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
GL_R8, 0, 1, 0, 6);
const std::array texture_handles{
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
null_image_view_2d_array.handle, null_image_view_cube.handle,
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
null_image_view_cube.handle,
};
for (const GLuint handle : texture_handles) {
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
}
const auto set_view = [this](ImageViewType type, GLuint handle) {
const auto set_view = [this](Shader::TextureType type, GLuint handle) {
if (device.HasDebuggingToolAttached()) {
const std::string name = fmt::format("NullImage {}", type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
null_image_views[static_cast<size_t>(type)] = handle;
};
set_view(ImageViewType::e1D, null_image_view_1d.handle);
set_view(ImageViewType::e2D, null_image_view_2d.handle);
set_view(ImageViewType::Cube, null_image_view_cube.handle);
set_view(ImageViewType::e3D, null_image_3d.handle);
set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
set_view(ImageViewType::Rect, null_image_rect.handle);
set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
set_view(Shader::TextureType::Color3D, null_image_3d.handle);
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
const auto& tuple = GetFormatTuple(info.format);
const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
gl_format = tuple.format;
gl_type = tuple.type;
@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
buffer.Create();
glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
}
void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
copy.dst_offset, copy.size);
}
}
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
internal_format = GetFormatTuple(format).internal_format;
internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
}
full_range = info.range;
flat_range = info.range;
set_object_label = device.HasDebuggingToolAttached();
is_render_target = info.IsRenderTarget();
original_texture = image.texture.handle;
num_samples = image.info.num_samples;
if (!is_render_target) {
swizzle[0] = info.x_source;
swizzle[1] = info.y_source;
swizzle[2] = info.z_source;
swizzle[3] = info.w_source;
}
VideoCommon::SubresourceRange flatten_range = info.range;
std::array<GLuint, 2> handles;
stored_views.reserve(2);
switch (info.type) {
case ImageViewType::e1DArray:
flatten_range.extent.layers = 1;
flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e1D:
glGenTextures(2, handles.data());
SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
SetupView(Shader::TextureType::Color1D);
SetupView(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2DArray:
flatten_range.extent.layers = 1;
flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e2D:
if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.base = {.level = info.range.base.level, .layer = 0},
.extent = {.levels = 1, .layers = 1},
};
glGenTextures(1, handles.data());
SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
break;
full_range = slice_range;
SetupView(Shader::TextureType::Color3D);
} else {
SetupView(Shader::TextureType::Color2D);
SetupView(Shader::TextureType::ColorArray2D);
}
glGenTextures(2, handles.data());
SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
break;
case ImageViewType::e3D:
glGenTextures(1, handles.data());
SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
SetupView(Shader::TextureType::Color3D);
break;
case ImageViewType::CubeArray:
flatten_range.extent.layers = 6;
flat_range.extent.layers = 6;
[[fallthrough]];
case ImageViewType::Cube:
glGenTextures(2, handles.data());
SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
SetupView(Shader::TextureType::ColorCube);
SetupView(Shader::TextureType::ColorArrayCube);
break;
case ImageViewType::Rect:
glGenTextures(1, handles.data());
SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
UNIMPLEMENTED();
break;
case ImageViewType::Buffer:
glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
UNREACHABLE();
break;
}
switch (info.type) {
case ImageViewType::e1D:
default_handle = Handle(Shader::TextureType::Color1D);
break;
case ImageViewType::e1DArray:
default_handle = Handle(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2D:
default_handle = Handle(Shader::TextureType::Color2D);
break;
case ImageViewType::e2DArray:
default_handle = Handle(Shader::TextureType::ColorArray2D);
break;
case ImageViewType::e3D:
default_handle = Handle(Shader::TextureType::Color3D);
break;
case ImageViewType::Cube:
default_handle = Handle(Shader::TextureType::ColorCube);
break;
case ImageViewType::CubeArray:
default_handle = Handle(Shader::TextureType::ColorArrayCube);
break;
default:
break;
}
default_handle = Handle(info.type);
}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
: VideoCommon::ImageViewBase{info, view_info} {}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
GLuint handle, const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range) {
if (info.type == ImageViewType::Buffer) {
// TODO: Take offset from buffer cache
glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
image.guest_size_bytes);
} else {
const GLuint parent = image.texture.handle;
const GLenum target = ImageTarget(view_type, image.info.num_samples);
glTextureView(handle, target, parent, internal_format, view_range.base.level,
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
if (!info.IsRenderTarget()) {
ApplySwizzle(handle, format, info.Swizzle());
}
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
if (image_format == Shader::ImageFormat::Typeless) {
return Handle(texture_type);
}
if (device.HasDebuggingToolAttached()) {
const std::string name = VideoCommon::Name(*this, view_type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
image_format == Shader::ImageFormat::R16_SINT};
if (!storage_views) {
storage_views = std::make_unique<StorageViews>();
}
stored_views.emplace_back().handle = handle;
views[static_cast<size_t>(view_type)] = handle;
auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
GLuint& view{type_views[static_cast<size_t>(texture_type)]};
if (view == 0) {
view = MakeView(texture_type, ShaderFormat(image_format));
}
return view;
}
void ImageView::SetupView(Shader::TextureType view_type) {
views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
}
GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
VideoCommon::SubresourceRange view_range;
switch (view_type) {
case Shader::TextureType::Color1D:
case Shader::TextureType::Color2D:
case Shader::TextureType::ColorCube:
view_range = flat_range;
break;
case Shader::TextureType::ColorArray1D:
case Shader::TextureType::ColorArray2D:
case Shader::TextureType::Color3D:
case Shader::TextureType::ColorArrayCube:
view_range = full_range;
break;
default:
UNREACHABLE();
}
OGLTextureView& view = stored_views.emplace_back();
view.Create();
const GLenum target = ImageTarget(view_type, num_samples);
glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
if (!is_render_target) {
std::array<SwizzleSource, 4> casted_swizzle;
std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
return static_cast<SwizzleSource>(component_swizzle);
});
ApplySwizzle(view.handle, format, casted_swizzle);
}
if (set_object_label) {
const std::string name = VideoCommon::Name(*this);
glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
}
return view.handle;
}
Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {

View file

@ -9,6 +9,7 @@
#include <glad/glad.h>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
@ -127,13 +128,12 @@ private:
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
};
class Image : public VideoCommon::ImageBase {
@ -154,8 +154,6 @@ public:
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@ -170,7 +168,6 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
OGLBuffer buffer;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
return views[static_cast<size_t>(query_type)];
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
@ -196,15 +200,38 @@ public:
return internal_format;
}
private:
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range);
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
return gpu_addr;
}
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
[[nodiscard]] u32 BufferSize() const noexcept {
return buffer_size;
}
private:
struct StorageViews {
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
};
void SetupView(Shader::TextureType view_type);
GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
GLuint default_handle = 0;
std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
GLuint default_handle = 0;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
GLuint original_texture = 0;
int num_samples = 0;
VideoCommon::SubresourceRange flat_range;
VideoCommon::SubresourceRange full_range;
std::array<u8, 4> swizzle{};
bool set_object_label = false;
bool is_render_target = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};

View file

@ -5,12 +5,120 @@
#pragma once
#include <glad/glad.h>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct FormatTuple {
GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:

View file

@ -25,6 +25,7 @@
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
}
AddTelemetryFields();
InitOpenGLObjects();
// Initialize default attributes to match hardware's disabled attributes
GLint max_attribs{};
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
}
// Enable seamless cubemaps when per texture parameters are not available
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
}
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
&vertex_buffer_address);
}
}
RendererOpenGL::~RendererOpenGL() = default;
@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
void RendererOpenGL::InitOpenGLObjects() {
// Create shader programs
OGLShader vertex_shader;
vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
OGLShader fragment_shader;
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
pipeline.Create();
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
// Enable seamless cubemaps when per texture parameters are not available
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
}
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
&vertex_buffer_address);
}
}
void RendererOpenGL::AddTelemetryFields() {
@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
std::data(ortho_matrix));
program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
program_manager.BindHostPipeline(pipeline.handle);
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
program_manager.RestoreGuestPipeline();
// TODO
// program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {

View file

@ -12,7 +12,6 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@ -111,9 +110,8 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
OGLPipeline pipeline;
OGLProgram present_vertex;
OGLProgram present_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer

View file

@ -16,8 +16,8 @@
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
OGLShader shader;
shader.Create(source, GL_COMPUTE_SHADER);
OGLProgram program;
program.Create(true, false, shader.handle);
return program;
return CreateProgram(source, GL_COMPUTE_SHADER);
}
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
return static_cast<size_t>(copy.extent.width * copy.extent.height *
copy.src_subresource.num_layers);
}
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindHostCompute(astc_decoder_program.handle);
program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindHostCompute(copy_bc4_program.handle);
program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
program_manager.BindHostCompute(copy_bgra_program.handle);
program_manager.BindComputeProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);