video_core: Reimplement the buffer cache
Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits.
This commit is contained in:
parent
a39d9c5194
commit
82c2601555
67 changed files with 2555 additions and 2648 deletions
|
@ -2,98 +2,235 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
GLsizei padding;
|
||||
};
|
||||
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
constexpr std::array PROGRAM_LUT{
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
|
||||
buffer.Create();
|
||||
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
if (runtime.device.UseAssemblyShaders()) {
|
||||
CreateMemoryObjects(runtime);
|
||||
glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(),
|
||||
memory_commit.Offset());
|
||||
} else {
|
||||
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
if (runtime.has_unified_vertex_buffers) {
|
||||
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
|
||||
}
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data_size), data);
|
||||
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
|
||||
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
|
||||
const GLintptr gl_offset = static_cast<GLintptr>(offset);
|
||||
if (read_buffer.handle == 0) {
|
||||
read_buffer.Create();
|
||||
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
|
||||
GL_STREAM_READ);
|
||||
}
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
|
||||
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
|
||||
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
|
||||
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size) {
|
||||
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
|
||||
}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
void Buffer::MakeResident(GLenum access) noexcept {
|
||||
// Abuse GLenum's order to exit early
|
||||
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
|
||||
if (access <= current_residency_access || buffer.handle == 0) {
|
||||
return;
|
||||
}
|
||||
if (std::exchange(current_residency_access, access) != GL_NONE) {
|
||||
// If the buffer is already resident, remove its residency before promoting it
|
||||
glMakeNamedBufferNonResidentNV(buffer.handle);
|
||||
}
|
||||
glMakeNamedBufferResidentNV(buffer.handle, access);
|
||||
}
|
||||
|
||||
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
for (const GLuint cbuf : cbufs) {
|
||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||
GLuint Buffer::SubBuffer(u32 offset) {
|
||||
if (offset == 0) {
|
||||
return buffer.handle;
|
||||
}
|
||||
for (const auto& [sub_buffer, sub_offset] : subs) {
|
||||
if (sub_offset == offset) {
|
||||
return sub_buffer.handle;
|
||||
}
|
||||
}
|
||||
OGLBuffer sub_buffer;
|
||||
sub_buffer.Create();
|
||||
glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset,
|
||||
memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset);
|
||||
return subs.emplace_back(std::move(sub_buffer), offset).first.handle;
|
||||
}
|
||||
|
||||
void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) {
|
||||
auto& allocator = runtime.vulkan_memory_allocator;
|
||||
auto& device = runtime.vulkan_device->GetLogical();
|
||||
auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = SizeBytes(),
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer);
|
||||
memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal);
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator_)
|
||||
: device{device_}, vulkan_device{vulkan_device_},
|
||||
vulkan_memory_allocator{vulkan_memory_allocator_},
|
||||
stream_buffer{device.HasFastBufferSubData() ? std::nullopt
|
||||
: std::make_optional<StreamBuffer>()} {
|
||||
GLint gl_max_attributes;
|
||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
|
||||
max_attributes = static_cast<u32>(gl_max_attributes);
|
||||
use_assembly_shaders = device.UseAssemblyShaders();
|
||||
has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory();
|
||||
|
||||
for (auto& stage_uniforms : fast_uniforms) {
|
||||
for (OGLBuffer& buffer : stage_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(
|
||||
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
|
||||
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
|
||||
index_buffer_offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
return {0, 0, 0};
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (index >= max_attributes) {
|
||||
return;
|
||||
}
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
|
||||
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizei>(stride));
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||
std::size_t size) {
|
||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint sub_buffer = buffer.SubBuffer(offset);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||
return {cbuf, 0, 0};
|
||||
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index,
|
||||
buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else if (size == 0) {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
|
||||
u32 size) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -5,79 +5,167 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
namespace Vulkan {
|
||||
class Device;
|
||||
class MemoryAllocator;
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
|
||||
u64 size_bytes);
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
|
||||
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
|
||||
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size);
|
||||
void MakeResident(GLenum access) noexcept;
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return gl_buffer.handle;
|
||||
[[nodiscard]] GLuint SubBuffer(u32 offset);
|
||||
|
||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||
return address;
|
||||
}
|
||||
|
||||
u64 Address() const noexcept {
|
||||
return gpu_address;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
OGLBuffer read_buffer;
|
||||
u64 gpu_address = 0;
|
||||
void CreateMemoryObjects(BufferCacheRuntime& runtime);
|
||||
|
||||
GLuint64EXT address = 0;
|
||||
Vulkan::MemoryCommit memory_commit;
|
||||
OGLBuffer buffer;
|
||||
GLenum current_residency_access = GL_NONE;
|
||||
std::vector<std::pair<OGLBuffer, u32>> subs;
|
||||
};
|
||||
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
explicit BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator_);
|
||||
|
||||
void Acquire() noexcept {
|
||||
cbuf_cursor = 0;
|
||||
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
|
||||
bool is_written);
|
||||
|
||||
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
|
||||
bool is_written);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint handle = fast_uniforms[stage][binding_index].handle;
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
||||
fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
|
||||
if (use_assembly_shaders) {
|
||||
glProgramBufferParametersIuivNV(
|
||||
PABO_LUT[stage], binding_index, 0,
|
||||
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
|
||||
reinterpret_cast<const GLuint*>(data.data()));
|
||||
} else {
|
||||
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
}
|
||||
|
||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
|
||||
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
return mapped_span;
|
||||
}
|
||||
|
||||
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
|
||||
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
|
||||
return device.HasFastBufferSubData();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
const Vulkan::Device* vulkan_device;
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator;
|
||||
std::optional<StreamBuffer> stream_buffer;
|
||||
|
||||
std::size_t cbuf_cursor = 0;
|
||||
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||
u32 max_attributes = 0;
|
||||
|
||||
bool use_assembly_shaders = false;
|
||||
bool has_unified_vertex_buffers = false;
|
||||
|
||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||
VideoCommon::NUM_STAGES>
|
||||
fast_uniforms;
|
||||
|
||||
u32 index_buffer_offset = 0;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = OpenGL::BufferCacheRuntime;
|
||||
using Buffer = OpenGL::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = true;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -21,9 +21,7 @@
|
|||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
|
@ -197,11 +195,13 @@ bool IsASTCSupported() {
|
|||
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
Device::Device(bool has_vulkan_instance) {
|
||||
if (!GLAD_GL_VERSION_4_6) {
|
||||
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
|
||||
throw std::runtime_error{"Insufficient version"};
|
||||
}
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
const std::vector extensions = GetExtensions();
|
||||
|
@ -217,6 +217,9 @@ Device::Device()
|
|||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
max_uniform_buffers = BuildMaxUniformBuffers();
|
||||
base_bindings = BuildBaseBindings();
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
|
@ -243,7 +246,8 @@ Device::Device()
|
|||
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
|
||||
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2 &&
|
||||
has_vulkan_instance;
|
||||
|
||||
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
|
||||
use_driver_cache = is_nvidia;
|
||||
|
|
|
@ -10,18 +10,16 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
static constexpr u32 EmulationUniformBlockBinding = 0;
|
||||
|
||||
class Device final {
|
||||
class Device {
|
||||
public:
|
||||
struct BaseBindings final {
|
||||
struct BaseBindings {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
};
|
||||
|
||||
explicit Device();
|
||||
explicit Device(bool has_vulkan_instance);
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
|
|
|
@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
|
|||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
BufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||
|
|
|
@ -32,14 +32,13 @@ private:
|
|||
};
|
||||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
QueryCache& query_cache);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
|
|
|
@ -44,17 +44,10 @@ using VideoCore::Surface::PixelFormat;
|
|||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -101,20 +94,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
|
|||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry) {
|
||||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
return Maxwell::MaxConstBufferSize;
|
||||
}
|
||||
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
|
@ -147,14 +126,6 @@ void oglEnable(GLenum cap, bool state) {
|
|||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||
if (num_ssbos == 0) {
|
||||
return;
|
||||
}
|
||||
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
|
@ -196,49 +167,35 @@ ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
|
|||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
const Vulkan::Device* vulkan_device,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_)
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
stream_buffer(device, state_tracker),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device, vulkan_device, vulkan_memory_allocator),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
query_cache(*this, maxwell3d, gpu_memory),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window_) {
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
for (const GLuint cbuf : staging_cbufs) {
|
||||
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
|
||||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
}
|
||||
}
|
||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
void RasterizerOpenGL::SetupVertexFormat() {
|
||||
void RasterizerOpenGL::SyncVertexFormats() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexFormats]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexFormats] = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
|
||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
|
||||
// the first 16 vertex attributes always, as we don't know which ones are actually used until
|
||||
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
|
||||
|
@ -274,55 +231,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexBuffer() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexBuffers]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexBuffers] = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
const auto& regs = maxwell3d.regs;
|
||||
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
|
||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||
continue;
|
||||
}
|
||||
flags[Dirty::VertexBuffer0 + index] = false;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const GPUVAddr start = vertex_array.StartAddress();
|
||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||
ASSERT(end >= start);
|
||||
|
||||
const GLuint gl_index = static_cast<GLuint>(index);
|
||||
const u64 size = end - start;
|
||||
if (size == 0) {
|
||||
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||
if (use_unified_memory) {
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const auto info = buffer_cache.UploadMemory(start, size);
|
||||
if (use_unified_memory) {
|
||||
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
|
||||
info.address + info.offset, size);
|
||||
} else {
|
||||
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexInstances() {
|
||||
void RasterizerOpenGL::SyncVertexInstances() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexInstances]) {
|
||||
return;
|
||||
|
@ -343,17 +252,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
|
|||
}
|
||||
}
|
||||
|
||||
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const std::size_t size = CalculateIndexBufferSize();
|
||||
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
|
||||
return info.offset;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
|
@ -410,11 +309,19 @@ void RasterizerOpenGL::SetupShaders() {
|
|||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
|
||||
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
|
||||
buffer.cbuf_offset, buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||
|
@ -430,43 +337,26 @@ void RasterizerOpenGL::SetupShaders() {
|
|||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (shader) {
|
||||
const auto base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
std::size_t size = 0;
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!regs.vertex_array[index].IsEnabled())
|
||||
if (!shader) {
|
||||
continue;
|
||||
|
||||
const GPUVAddr start = regs.vertex_array[index].StartAddress();
|
||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||
|
||||
size += end - start;
|
||||
ASSERT(end >= start);
|
||||
}
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
const auto& base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
|
||||
return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
|
||||
static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
|
||||
|
@ -475,6 +365,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Clears);
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
}
|
||||
|
@ -525,11 +416,9 @@ void RasterizerOpenGL::Clear() {
|
|||
}
|
||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
}
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
|
@ -541,7 +430,6 @@ void RasterizerOpenGL::Clear() {
|
|||
} else if (use_stencil) {
|
||||
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
||||
}
|
||||
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
||||
|
@ -550,75 +438,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
SyncPolygonModes();
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
SyncDepthTestState();
|
||||
SyncDepthClamp();
|
||||
SyncStencilTestState();
|
||||
SyncBlendState();
|
||||
SyncLogicOpState();
|
||||
SyncCullMode();
|
||||
SyncPrimitiveRestart();
|
||||
SyncScissorTest();
|
||||
SyncPointState();
|
||||
SyncLineState();
|
||||
SyncPolygonOffset();
|
||||
SyncAlphaTest();
|
||||
SyncFramebufferSRGB();
|
||||
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
std::size_t buffer_size = CalculateVertexArraysSize();
|
||||
|
||||
// Add space for index buffer
|
||||
if (is_indexed) {
|
||||
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
|
||||
}
|
||||
|
||||
// Uniform space for the 5 shader stages
|
||||
buffer_size =
|
||||
Common::AlignUp<std::size_t>(buffer_size, 4) +
|
||||
(sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
|
||||
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
|
||||
// Upload vertex and index data.
|
||||
SetupVertexBuffer();
|
||||
SetupVertexInstances();
|
||||
GLintptr index_buffer_offset = 0;
|
||||
if (is_indexed) {
|
||||
index_buffer_offset = SetupIndexBuffer();
|
||||
}
|
||||
|
||||
// Setup emulation uniform buffer.
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
MaxwellUniformData ubo;
|
||||
ubo.SetFromRegs(maxwell3d);
|
||||
const auto info =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
}
|
||||
SyncState();
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
SetupShaders();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
SetupShaders(is_indexed);
|
||||
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
buffer_cache.Unmap();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
|
@ -632,7 +457,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
if (is_indexed) {
|
||||
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
|
||||
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
|
||||
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
|
||||
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
|
||||
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
|
||||
glDrawElements(primitive_mode, num_vertices, format, offset);
|
||||
|
@ -672,22 +497,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
SetupComputeGlobalMemory(kernel);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
const auto& entries = kernel->GetEntries();
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : entries.global_memory_entries) {
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
|
||||
buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
|
@ -703,6 +528,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
|||
query_cache.Query(gpu_addr, type, timestamp);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
u32 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
|
@ -711,19 +542,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
|||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
return buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
|
@ -732,11 +567,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
|||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
|
@ -745,26 +583,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
buffer_cache.SyncGuestHost();
|
||||
shader_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.FlushCachedWrites();
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
|
@ -799,14 +646,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::WaitForIdle() {
|
||||
// Place a barrier on everything that is not framebuffer related.
|
||||
// This is related to another flag that is not currently implemented.
|
||||
glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
|
||||
GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
|
||||
GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
|
||||
GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
|
||||
GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
|
||||
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
|
||||
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
|
@ -831,18 +671,21 @@ void RasterizerOpenGL::TickFrame() {
|
|||
num_queued_commands = 0;
|
||||
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
|
@ -854,7 +697,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
|
||||
if (!image_view) {
|
||||
return false;
|
||||
|
@ -921,166 +764,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = maxwell3d.state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
const auto& entries = shader->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
|
||||
|
||||
const auto base_bindings = device.GetBaseBindings(stage_index);
|
||||
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const u32 index = entry.GetIndex();
|
||||
const auto& buffer = shader_stage.const_buffers[index];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
|
||||
base_unified_offset + index * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
|
||||
entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
|
||||
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
const auto& entries = kernel->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
|
||||
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
|
||||
// UBO alignment requirements.
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
|
||||
|
||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||
const GPUVAddr gpu_addr = buffer.address;
|
||||
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UNIMPLEMENTED_IF(use_unified);
|
||||
if (info.offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
|
||||
info.handle = staging_cbuf;
|
||||
info.offset = 0;
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (use_unified) {
|
||||
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
|
||||
unified_offset, size);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array TARGET_LUT = {
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (assembly_shaders) {
|
||||
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
*ssbo = BindlessSSBO{
|
||||
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
|
@ -1128,6 +811,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncState() {
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
SyncPolygonModes();
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
SyncDepthTestState();
|
||||
SyncDepthClamp();
|
||||
SyncStencilTestState();
|
||||
SyncBlendState();
|
||||
SyncLogicOpState();
|
||||
SyncCullMode();
|
||||
SyncPrimitiveRestart();
|
||||
SyncScissorTest();
|
||||
SyncPointState();
|
||||
SyncLineState();
|
||||
SyncPolygonOffset();
|
||||
SyncAlphaTest();
|
||||
SyncFramebufferSRGB();
|
||||
SyncVertexFormats();
|
||||
SyncVertexInstances();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
@ -1163,9 +870,11 @@ void RasterizerOpenGL::SyncViewport() {
|
|||
if (regs.screen_y_control.y_negate != 0) {
|
||||
flip_y = !flip_y;
|
||||
}
|
||||
glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
|
||||
regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
|
||||
: GL_NEGATIVE_ONE_TO_ONE);
|
||||
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
|
||||
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
|
||||
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
|
||||
state_tracker.ClipControl(origin, depth);
|
||||
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
|
||||
}
|
||||
|
||||
if (dirty_viewport) {
|
||||
|
@ -1649,36 +1358,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
|||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
SyncTransformFeedback();
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||
const auto& binding = regs.tfb_bindings[index];
|
||||
if (!binding.buffer_enable) {
|
||||
if (enabled_transform_feedback_buffers[index]) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
|
||||
0);
|
||||
}
|
||||
enabled_transform_feedback_buffers[index] = false;
|
||||
continue;
|
||||
}
|
||||
enabled_transform_feedback_buffers[index] = true;
|
||||
|
||||
auto& tfb_buffer = transform_feedback_buffers[index];
|
||||
tfb_buffer.Create();
|
||||
|
||||
const GLuint handle = tfb_buffer.handle;
|
||||
const std::size_t size = binding.buffer_size;
|
||||
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
|
||||
|
||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||
|
@ -1692,23 +1378,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
|
|||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
glEndTransformFeedback();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||
const auto& binding = regs.tfb_bindings[index];
|
||||
if (!binding.buffer_enable) {
|
||||
continue;
|
||||
}
|
||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||
|
||||
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||
const GPUVAddr gpu_addr = binding.Address();
|
||||
const std::size_t size = binding.buffer_size;
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
@ -47,6 +46,11 @@ namespace Tegra {
|
|||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
class Device;
|
||||
class MemoryAllocator;
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
@ -63,6 +67,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
|||
public:
|
||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
const Vulkan::Device* vulkan_device,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_);
|
||||
~RasterizerOpenGL() override;
|
||||
|
@ -72,6 +78,7 @@ public:
|
|||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
|
@ -119,27 +126,6 @@ private:
|
|||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(Shader* kernel);
|
||||
|
||||
/// Configures a global memory buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
|
@ -152,6 +138,9 @@ private:
|
|||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs state to match guest's
|
||||
void SyncState();
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
|
||||
|
@ -215,6 +204,12 @@ private:
|
|||
/// Syncs the framebuffer sRGB state to match the guest state
|
||||
void SyncFramebufferSRGB();
|
||||
|
||||
/// Syncs vertex formats to match the guest state
|
||||
void SyncVertexFormats();
|
||||
|
||||
/// Syncs vertex instances to match the guest state
|
||||
void SyncVertexInstances();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
|
@ -225,19 +220,7 @@ private:
|
|||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
||||
/// Updates the current vertex format
|
||||
void SetupVertexFormat();
|
||||
|
||||
void SetupVertexBuffer();
|
||||
void SetupVertexInstances();
|
||||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders();
|
||||
void SetupShaders(bool is_indexed);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
|
@ -249,12 +232,12 @@ private:
|
|||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
@ -262,20 +245,8 @@ private:
|
|||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
enabled_transform_feedback_buffers;
|
||||
|
||||
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles{};
|
||||
std::array<GLuint, MAX_IMAGES> image_handles{};
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
|
|
@ -171,12 +171,6 @@ void OGLBuffer::Release() {
|
|||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
|
||||
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
|
||||
|
||||
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
void OGLSync::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
|
|
@ -234,9 +234,6 @@ public:
|
|||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
// Converts the buffer into a stream copy buffer with a fixed size
|
||||
void MakeStreamCopy(std::size_t buffer_size);
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
|
|||
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
|
||||
|
||||
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
||||
constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
|
||||
#define ftou floatBitsToUint
|
||||
#define itof intBitsToFloat
|
||||
#define utof uintBitsToFloat
|
||||
|
@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
|
|||
|
||||
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
|
||||
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
|
||||
|
||||
layout (std140, binding = {}) uniform vs_config {{
|
||||
float y_direction;
|
||||
}};
|
||||
)";
|
||||
|
||||
class ShaderWriter final {
|
||||
|
@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
|||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
|
||||
// We waste one UBO for emulation
|
||||
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
|
||||
return num_ubos > num_available_ubos;
|
||||
}
|
||||
|
||||
struct GenericVaryingDescription {
|
||||
std::string name;
|
||||
u8 first_element = 0;
|
||||
|
@ -420,9 +409,8 @@ public:
|
|||
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
|
||||
ShaderType stage_, std::string_view identifier_,
|
||||
std::string_view suffix_)
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
|
||||
suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
|
||||
UseUnifiedUniforms(device_, ir_, stage_)} {
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
|
||||
identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
|
||||
if (stage != ShaderType::Compute) {
|
||||
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||
}
|
||||
|
@ -516,7 +504,8 @@ private:
|
|||
if (!identifier.empty()) {
|
||||
code.AddLine("// {}", identifier);
|
||||
}
|
||||
code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
|
||||
const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
|
||||
code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
|
||||
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
|
||||
if (device.HasShaderBallot()) {
|
||||
code.AddLine("#extension GL_ARB_shader_ballot : require");
|
||||
|
@ -542,7 +531,7 @@ private:
|
|||
|
||||
code.AddNewLine();
|
||||
|
||||
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
|
||||
code.AddLine(COMMON_DECLARATIONS);
|
||||
}
|
||||
|
||||
void DeclareVertex() {
|
||||
|
@ -865,17 +854,6 @@ private:
|
|||
}
|
||||
|
||||
void DeclareConstantBuffers() {
|
||||
if (use_unified_uniforms) {
|
||||
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
|
||||
static_cast<u32>(ir.GetGlobalMemory().size());
|
||||
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
|
||||
binding);
|
||||
code.AddLine(" uint cbufs[];");
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
return;
|
||||
}
|
||||
|
||||
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
for (const auto& [index, info] : ir.GetConstantBuffers()) {
|
||||
const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
|
||||
|
@ -1081,29 +1059,17 @@ private:
|
|||
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
||||
const Node offset = cbuf->GetOffset();
|
||||
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
|
||||
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
// Direct access
|
||||
const u32 offset_imm = immediate->GetValue();
|
||||
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
|
||||
Type::Uint};
|
||||
} else {
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
|
||||
Visit(offset).AsUint()),
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
|
||||
|
@ -2293,7 +2259,6 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (header.ps.omap.depth) {
|
||||
// The depth output is always 2 registers after the last color output, and current_reg
|
||||
// already contains one past the last color register.
|
||||
|
@ -2337,7 +2302,8 @@ private:
|
|||
}
|
||||
|
||||
Expression YNegate(Operation operation) {
|
||||
return {"y_direction", Type::Float};
|
||||
// Y_NEGATE is mapped to this uniform value
|
||||
return {"gl_FrontMaterial.ambient.a", Type::Float};
|
||||
}
|
||||
|
||||
template <u32 element>
|
||||
|
@ -2787,7 +2753,6 @@ private:
|
|||
const std::string_view identifier;
|
||||
const std::string_view suffix;
|
||||
const Header header;
|
||||
const bool use_unified_uniforms;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
ShaderWriter code;
|
||||
|
@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
|
|||
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
|
||||
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
||||
}
|
||||
for (const auto& buffer : entries.const_buffers) {
|
||||
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
|
||||
}
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ struct ShaderEntries {
|
|||
std::vector<ImageEntry> images;
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
bool use_unified_uniforms{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
|
|
|
@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
|
|||
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
|
||||
}
|
||||
|
||||
void SetupDirtyVertexArrays(Tables& tables) {
|
||||
static constexpr std::size_t num_array = 3;
|
||||
void SetupDirtyVertexInstances(Tables& tables) {
|
||||
static constexpr std::size_t instance_base_offset = 3;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
|
||||
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
|
||||
|
||||
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
|
||||
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
|
||||
|
||||
const std::size_t instance_array_offset = array_offset + instance_base_offset;
|
||||
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
|
||||
tables[1][instance_array_offset] = VertexInstances;
|
||||
|
@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
|
|||
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
|
||||
auto& dirty = gpu.Maxwell3D().dirty;
|
||||
auto& tables = dirty.tables;
|
||||
SetupDirtyRenderTargets(tables);
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyColorMasks(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyVertexArrays(tables);
|
||||
SetupDirtyVertexInstances(tables);
|
||||
SetupDirtyVertexFormat(tables);
|
||||
SetupDirtyShaders(tables);
|
||||
SetupDirtyPolygonModes(tables);
|
||||
|
@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
|
|||
SetupDirtyClipControl(tables);
|
||||
SetupDirtyDepthClampEnabled(tables);
|
||||
SetupDirtyMisc(tables);
|
||||
|
||||
auto& store = dirty.on_write_stores;
|
||||
store[VertexBuffers] = true;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
store[VertexBuffer0 + i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateStreamBuffer() {
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -28,10 +28,6 @@ enum : u8 {
|
|||
VertexFormat0,
|
||||
VertexFormat31 = VertexFormat0 + 31,
|
||||
|
||||
VertexBuffers,
|
||||
VertexBuffer0,
|
||||
VertexBuffer31 = VertexBuffer0 + 31,
|
||||
|
||||
VertexInstances,
|
||||
VertexInstance0,
|
||||
VertexInstance31 = VertexInstance0 + 31,
|
||||
|
@ -92,8 +88,6 @@ class StateTracker {
|
|||
public:
|
||||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateStreamBuffer();
|
||||
|
||||
void BindIndexBuffer(GLuint new_index_buffer) {
|
||||
if (index_buffer == new_index_buffer) {
|
||||
return;
|
||||
|
@ -110,13 +104,32 @@ public:
|
|||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
|
||||
}
|
||||
|
||||
void ClipControl(GLenum new_origin, GLenum new_depth) {
|
||||
if (new_origin == origin && new_depth == depth) {
|
||||
return;
|
||||
}
|
||||
origin = new_origin;
|
||||
depth = new_depth;
|
||||
glClipControl(origin, depth);
|
||||
}
|
||||
|
||||
void SetYNegate(bool new_y_negate) {
|
||||
if (new_y_negate == y_negate) {
|
||||
return;
|
||||
}
|
||||
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
|
||||
y_negate = new_y_negate;
|
||||
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
|
||||
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
|
||||
}
|
||||
|
||||
void NotifyScreenDrawVertexArray() {
|
||||
flags[OpenGL::Dirty::VertexFormats] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexBuffers] = true;
|
||||
flags[OpenGL::Dirty::VertexBuffer0] = true;
|
||||
flags[VideoCommon::Dirty::VertexBuffers] = true;
|
||||
flags[VideoCommon::Dirty::VertexBuffer0] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexInstances] = true;
|
||||
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
|
||||
|
@ -202,6 +215,9 @@ private:
|
|||
|
||||
GLuint framebuffer = 0;
|
||||
GLuint index_buffer = 0;
|
||||
GLenum origin = GL_LOWER_LEFT;
|
||||
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
|
||||
bool y_negate = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -1,70 +1,64 @@
|
|||
// Copyright 2018 Citra Emulator Project
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
|
||||
: state_tracker{state_tracker_} {
|
||||
gl_buffer.Create();
|
||||
|
||||
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
StreamBuffer::StreamBuffer() {
|
||||
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
buffer.Create();
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
|
||||
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
|
||||
mapped_pointer =
|
||||
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
|
||||
for (OGLSync& sync : fences) {
|
||||
sync.Create();
|
||||
}
|
||||
}
|
||||
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
glUnmapNamedBuffer(gl_buffer.handle);
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= BUFFER_SIZE);
|
||||
ASSERT(alignment <= BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
|
||||
ASSERT(size < REGION_SIZE);
|
||||
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
|
||||
++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = iterator;
|
||||
|
||||
if (buffer_pos + size > BUFFER_SIZE) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
glInvalidateBufferData(gl_buffer.handle);
|
||||
state_tracker.InvalidateStreamBuffer();
|
||||
|
||||
buffer_pos = 0;
|
||||
for (size_t region = Region(free_iterator) + 1,
|
||||
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
|
||||
region < region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
|
||||
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (size > 0) {
|
||||
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
|
||||
if (iterator + size > free_iterator) {
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
if (iterator + size > STREAM_BUFFER_SIZE) {
|
||||
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
buffer_pos += size;
|
||||
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return {std::span(mapped_pointer + offset, size), offset};
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
// Copyright 2018 Citra Emulator Project
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
@ -13,48 +16,35 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
class StreamBuffer {
|
||||
static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
|
||||
static constexpr size_t MAX_ALIGNMENT = 256;
|
||||
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
|
||||
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
|
||||
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
|
||||
~OGLStreamBuffer();
|
||||
explicit StreamBuffer();
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, and the offset within the buffer.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return gpu_address;
|
||||
}
|
||||
|
||||
GLsizeiptr Size() const noexcept {
|
||||
return BUFFER_SIZE;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
[[nodiscard]] static size_t Region(size_t offset) noexcept {
|
||||
return offset / REGION_SIZE;
|
||||
}
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
GLuint64EXT gpu_address = 0;
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
size_t iterator = 0;
|
||||
size_t used_iterator = 0;
|
||||
size_t free_iterator = 0;
|
||||
u8* mapped_pointer = nullptr;
|
||||
OGLBuffer buffer;
|
||||
std::array<OGLSync, NUM_SYNCS> fences;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
|
||||
: span(map, size), sync{sync_}, handle{handle_} {}
|
||||
|
||||
ImageBufferMap::~ImageBufferMap() {
|
||||
if (sync) {
|
||||
sync->Create();
|
||||
|
@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
|
|||
glFinish();
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
|
||||
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return upload_buffers.RequestMap(size, true);
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
|
||||
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return download_buffers.RequestMap(size, false);
|
||||
}
|
||||
|
||||
|
@ -596,7 +593,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
|
|||
bool insert_fence) {
|
||||
const size_t index = RequestBuffer(requested_size);
|
||||
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
|
||||
return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
|
||||
return ImageBufferMap{
|
||||
.mapped_span = std::span(maps[index], requested_size),
|
||||
.sync = sync,
|
||||
.buffer = buffers[index].handle,
|
||||
};
|
||||
}
|
||||
|
||||
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
|
||||
|
@ -711,7 +712,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
|||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
|
||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
|
||||
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
|
@ -735,7 +736,7 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
|||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
|
||||
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset,
|
||||
copy.dst_offset, copy.size);
|
||||
}
|
||||
}
|
||||
|
@ -744,7 +745,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
|||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||
|
||||
u32 current_row_length = std::numeric_limits<u32>::max();
|
||||
|
|
|
@ -31,23 +31,12 @@ using VideoCommon::NUM_RT;
|
|||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
|
||||
class ImageBufferMap {
|
||||
public:
|
||||
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
|
||||
struct ImageBufferMap {
|
||||
~ImageBufferMap();
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<u8> span;
|
||||
std::span<u8> mapped_span;
|
||||
OGLSync* sync;
|
||||
GLuint handle;
|
||||
GLuint buffer;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
|
@ -69,9 +58,9 @@ public:
|
|||
|
||||
void Finish();
|
||||
|
||||
ImageBufferMap MapUploadBuffer(size_t size);
|
||||
ImageBufferMap UploadStagingBuffer(size_t size);
|
||||
|
||||
ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
ImageBufferMap DownloadStagingBuffer(size_t size);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
|
|
|
@ -27,11 +27,14 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/vulkan_common/vulkan_debug_callback.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr GLint PositionLocation = 0;
|
||||
constexpr GLint TexCoordLocation = 1;
|
||||
constexpr GLint ModelViewMatrixLocation = 0;
|
||||
|
@ -125,25 +128,98 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
|
|||
}
|
||||
}
|
||||
|
||||
Vulkan::vk::PhysicalDevice FindPhysicalDevice(Vulkan::vk::Instance& instance) {
|
||||
using namespace Vulkan;
|
||||
using UUID = std::array<GLubyte, GL_UUID_SIZE_EXT>;
|
||||
|
||||
GLint num_device_uuids;
|
||||
glGetIntegerv(GL_NUM_DEVICE_UUIDS_EXT, &num_device_uuids);
|
||||
std::vector<UUID> device_uuids(num_device_uuids);
|
||||
for (GLint index = 0; index < num_device_uuids; ++index) {
|
||||
glGetUnsignedBytei_vEXT(GL_DEVICE_UUID_EXT, 0, device_uuids[index].data());
|
||||
}
|
||||
UUID driver_uuid;
|
||||
glGetUnsignedBytevEXT(GL_DRIVER_UUID_EXT, driver_uuid.data());
|
||||
|
||||
for (const VkPhysicalDevice raw_physical_device : instance.EnumeratePhysicalDevices()) {
|
||||
VkPhysicalDeviceIDProperties device_id_properties{};
|
||||
device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
|
||||
|
||||
VkPhysicalDeviceProperties2KHR properties{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
|
||||
.pNext = &device_id_properties,
|
||||
.properties{},
|
||||
};
|
||||
vk::PhysicalDevice physical_device(raw_physical_device, instance.Dispatch());
|
||||
physical_device.GetProperties2KHR(properties);
|
||||
if (!std::ranges::equal(device_id_properties.driverUUID, driver_uuid)) {
|
||||
continue;
|
||||
}
|
||||
const auto it =
|
||||
std::ranges::find_if(device_uuids, [&device_id_properties, driver_uuid](UUID uuid) {
|
||||
return std::ranges::equal(device_id_properties.deviceUUID, uuid);
|
||||
});
|
||||
if (it != device_uuids.end()) {
|
||||
return physical_device;
|
||||
}
|
||||
}
|
||||
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
struct VulkanObjects {
|
||||
static std::unique_ptr<VulkanObjects> TryCreate() {
|
||||
if (!GLAD_GL_EXT_memory_object) {
|
||||
// Interop is not present
|
||||
return nullptr;
|
||||
}
|
||||
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
|
||||
if (vendor == "ATI Technologies Inc.") {
|
||||
// Avoid using GL_EXT_memory_object on AMD, as it makes the GL driver crash
|
||||
return nullptr;
|
||||
}
|
||||
if (!Settings::values.use_assembly_shaders.GetValue()) {
|
||||
// We only need interop when assembly shaders are enabled
|
||||
return nullptr;
|
||||
}
|
||||
#ifdef __linux__
|
||||
LOG_WARNING(Render_OpenGL, "Interop doesn't work on Linux at the moment");
|
||||
return nullptr;
|
||||
#endif
|
||||
try {
|
||||
return std::make_unique<VulkanObjects>();
|
||||
} catch (const Vulkan::vk::Exception& exception) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to initialize Vulkan objects with error: {}",
|
||||
exception.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Common::DynamicLibrary library{Vulkan::OpenLibrary()};
|
||||
Vulkan::vk::InstanceDispatch dld;
|
||||
Vulkan::vk::Instance instance{Vulkan::CreateInstance(library, dld, VK_API_VERSION_1_1)};
|
||||
Vulkan::Device device{*instance, FindPhysicalDevice(instance), nullptr, dld};
|
||||
Vulkan::MemoryAllocator memory_allocator{device, true};
|
||||
};
|
||||
|
||||
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window_,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device},
|
||||
rasterizer{emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker} {
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_},
|
||||
vulkan_objects{VulkanObjects::TryCreate()}, device{vulkan_objects != nullptr},
|
||||
state_tracker{gpu}, program_manager{device},
|
||||
rasterizer(emu_window, gpu, cpu_memory, device,
|
||||
vulkan_objects ? &vulkan_objects->device : nullptr,
|
||||
vulkan_objects ? &vulkan_objects->memory_allocator : nullptr, screen_info,
|
||||
program_manager, state_tracker) {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
AddTelemetryFields();
|
||||
|
||||
if (!GLAD_GL_VERSION_4_6) {
|
||||
throw std::runtime_error{"OpenGL 4.3 is not available"};
|
||||
}
|
||||
InitOpenGLObjects();
|
||||
}
|
||||
|
||||
|
@ -280,6 +356,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
|
@ -412,6 +489,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
|
||||
program_manager.BindHostPipeline(pipeline.handle);
|
||||
|
||||
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (screen_info.display_srgb) {
|
||||
glEnable(GL_FRAMEBUFFER_SRGB);
|
||||
|
@ -430,7 +508,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
glCullFace(GL_BACK);
|
||||
glFrontFace(GL_CW);
|
||||
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
|
||||
static_cast<GLfloat>(layout.height));
|
||||
glDepthRangeIndexed(0, 0.0, 0.0);
|
||||
|
|
|
@ -38,6 +38,8 @@ class GPU;
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
struct VulkanObjects;
|
||||
|
||||
/// Structure used for storing information about the textures for the Switch screen
|
||||
struct TextureInfo {
|
||||
OGLTexture resource;
|
||||
|
@ -99,8 +101,11 @@ private:
|
|||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
const Device device;
|
||||
StateTracker state_tracker{gpu};
|
||||
std::unique_ptr<VulkanObjects> vulkan_objects;
|
||||
Device device;
|
||||
StateTracker state_tracker;
|
||||
ProgramManager program_manager;
|
||||
RasterizerOpenGL rasterizer;
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
|
@ -116,11 +121,6 @@ private:
|
|||
/// Display information for Switch screen
|
||||
ScreenInfo screen_info;
|
||||
|
||||
/// Global dummy shader pipeline
|
||||
ProgramManager program_manager;
|
||||
|
||||
RasterizerOpenGL rasterizer;
|
||||
|
||||
/// OpenGL framebuffer data
|
||||
std::vector<u8> gl_framebuffer_data;
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
|||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
|
@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
|||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
|
@ -108,7 +108,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
|||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
|
@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
|||
glUniform1ui(7, params.block_height_mask);
|
||||
glUniform1ui(8, params.block_depth);
|
||||
glUniform1ui(9, params.block_depth_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
|
@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
|||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||
|
@ -172,8 +172,8 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
|||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
|
|
|
@ -15,9 +15,10 @@
|
|||
namespace OpenGL {
|
||||
|
||||
class Image;
|
||||
class ImageBufferMap;
|
||||
class ProgramManager;
|
||||
|
||||
struct ImageBufferMap;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue