Remove references to PICA and rasterizers in video_core

This commit is contained in:
James Rowe 2018-01-11 20:07:44 -07:00
parent ebf9a784a9
commit 1d28b2e142
77 changed files with 4 additions and 16444 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,316 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <vector>
#include <glad/glad.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
#include "video_core/shader/shader.h"
struct ScreenInfo;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
RasterizerOpenGL();
~RasterizerOpenGL() override;
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override;
void NotifyPicaRegisterChanged(u32 id) override;
void FlushAll() override;
void FlushRegion(PAddr addr, u64 size) override;
void FlushAndInvalidateRegion(PAddr addr, u64 size) override;
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) override;
/// OpenGL shader generated for a given Pica register state
struct PicaShader {
/// OpenGL shader resource
OGLShader shader;
};
private:
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
OGLSampler sampler;
/// Creates the sampler object, initializing its state so that it's in sync with the
/// SamplerInfo struct.
void Create();
/// Syncs the sampler object with the config, updating any necessary state.
void SyncWithConfig(const TextureConfig& config);
private:
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color;
};
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
position[0] = v.pos.x.ToFloat32();
position[1] = v.pos.y.ToFloat32();
position[2] = v.pos.z.ToFloat32();
position[3] = v.pos.w.ToFloat32();
color[0] = v.color.x.ToFloat32();
color[1] = v.color.y.ToFloat32();
color[2] = v.color.z.ToFloat32();
color[3] = v.color.w.ToFloat32();
tex_coord0[0] = v.tc0.x.ToFloat32();
tex_coord0[1] = v.tc0.y.ToFloat32();
tex_coord1[0] = v.tc1.x.ToFloat32();
tex_coord1[1] = v.tc1.y.ToFloat32();
tex_coord2[0] = v.tc2.x.ToFloat32();
tex_coord2[1] = v.tc2.y.ToFloat32();
tex_coord0_w = v.tc0_w.ToFloat32();
normquat[0] = v.quat.x.ToFloat32();
normquat[1] = v.quat.y.ToFloat32();
normquat[2] = v.quat.z.ToFloat32();
normquat[3] = v.quat.w.ToFloat32();
view[0] = v.view.x.ToFloat32();
view[1] = v.view.y.ToFloat32();
view[2] = v.view.z.ToFloat32();
if (flip_quaternion) {
for (float& x : normquat) {
x = -x;
}
}
}
GLfloat position[4];
GLfloat color[4];
GLfloat tex_coord0[2];
GLfloat tex_coord1[2];
GLfloat tex_coord2[2];
GLfloat tex_coord0_w;
GLfloat normquat[4];
GLfloat view[3];
};
struct LightSrc {
alignas(16) GLvec3 specular_0;
alignas(16) GLvec3 specular_1;
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
alignas(16) GLvec3 spot_direction; // negated
GLfloat dist_atten_bias;
GLfloat dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
alignas(8) GLvec2 framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
GLint scissor_x1;
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
alignas(8) GLvec2 proctex_noise_a;
alignas(8) GLvec2 proctex_noise_p;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
alignas(16) GLvec4 clip_coef;
};
static_assert(
sizeof(UniformData) == 0x470,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
/// Syncs the blend functions to match the PICA register
void SyncBlendFuncs();
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
void SyncFogLUT();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture lookup tables
void SyncProcTexNoiseLUT();
void SyncProcTexColorMap();
void SyncProcTexAlphaMap();
void SyncProcTexLUT();
void SyncProcTexDiffLUT();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
/// Syncs the color write mask to match the PICA register state
void SyncColorWriteMask();
/// Syncs the stencil write mask to match the PICA register state
void SyncStencilWriteMask();
/// Syncs the depth write mask to match the PICA register state
void SyncDepthWriteMask();
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(int tev_index, const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the lighting lookup tables
void SyncLightingLUT(unsigned index);
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
OpenGLState state;
RasterizerCacheOpenGL res_cache;
std::vector<HardwareVertex> vertex_batch;
std::unordered_map<GLShader::PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
const PicaShader* current_shader = nullptr;
bool shader_dirty;
struct {
UniformData data;
std::array<bool, Pica::LightingRegs::NumLightingSampler> lut_dirty;
bool fog_lut_dirty;
bool proctex_noise_lut_dirty;
bool proctex_color_map_dirty;
bool proctex_alpha_map_dirty;
bool proctex_lut_dirty;
bool proctex_diff_lut_dirty;
bool dirty;
} uniform_block_data = {};
std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
OGLBuffer vertex_buffer;
OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer;
OGLBuffer lighting_lut_buffer;
OGLTexture lighting_lut;
std::array<std::array<GLvec2, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{};
OGLBuffer fog_lut_buffer;
OGLTexture fog_lut;
std::array<GLvec2, 128> fog_lut_data{};
OGLBuffer proctex_noise_lut_buffer;
OGLTexture proctex_noise_lut;
std::array<GLvec2, 128> proctex_noise_lut_data{};
OGLBuffer proctex_color_map_buffer;
OGLTexture proctex_color_map;
std::array<GLvec2, 128> proctex_color_map_data{};
OGLBuffer proctex_alpha_map_buffer;
OGLTexture proctex_alpha_map;
std::array<GLvec2, 128> proctex_alpha_map_data{};
OGLBuffer proctex_lut_buffer;
OGLTexture proctex_lut;
std::array<GLvec4, 256> proctex_lut_data{};
OGLBuffer proctex_diff_lut_buffer;
OGLTexture proctex_diff_lut;
std::array<GLvec4, 256> proctex_diff_lut_data{};
};

View file

@ -1,799 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <atomic>
#include <cstring>
#include <iterator>
#include <unordered_set>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/bit_field.h"
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/texture/texture_decode.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
};
static const std::array<FormatTuple, 5> fb_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8
{GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
}};
static const std::array<FormatTuple, 4> depth_format_tuples = {{
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
{},
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
}};
RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
transfer_framebuffers[0].Create();
transfer_framebuffers[1].Create();
}
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
FlushAll();
}
static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height,
u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data,
u8* gl_data, bool morton_to_gl) {
using PixelFormat = CachedSurface::PixelFormat;
u8* data_ptrs[2];
u32 depth_stencil_shifts[2] = {24, 8};
if (morton_to_gl) {
std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
}
if (pixel_format == PixelFormat::D24S8) {
for (unsigned y = 0; y < height; ++y) {
for (unsigned x = 0; x < width; ++x) {
const u32 coarse_y = y & ~7;
u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
coarse_y * width * bytes_per_pixel;
u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
data_ptrs[morton_to_gl] = morton_data + morton_offset;
data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
// Swap depth and stencil value ordering since 3DS does not match OpenGL
u32 depth_stencil;
memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
depth_stencil = (depth_stencil << depth_stencil_shifts[0]) |
(depth_stencil >> depth_stencil_shifts[1]);
memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
}
}
} else {
for (unsigned y = 0; y < height; ++y) {
for (unsigned x = 0; x < width; ++x) {
const u32 coarse_y = y & ~7;
u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
coarse_y * width * bytes_per_pixel;
u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
data_ptrs[morton_to_gl] = morton_data + morton_offset;
data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
}
}
}
}
void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex,
CachedSurface::SurfaceType type,
const MathUtil::Rectangle<int>& src_rect,
const MathUtil::Rectangle<int>& dst_rect) {
using SurfaceType = CachedSurface::SurfaceType;
OpenGLState cur_state = OpenGLState::GetCurState();
// Make sure textures aren't bound to texture units, since going to bind them to framebuffer
// components
OpenGLState::ResetTexture(src_tex);
OpenGLState::ResetTexture(dst_tex);
// Keep track of previous framebuffer bindings
GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer};
cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
cur_state.Apply();
u32 buffers = 0;
if (type == SurfaceType::Color || type == SurfaceType::Texture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
buffers = GL_COLOR_BUFFER_BIT;
} else if (type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
} else if (type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
src_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_tex, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
// Restore previous framebuffer bindings
cur_state.draw.read_framebuffer = old_fbs[0];
cur_state.draw.draw_framebuffer = old_fbs[1];
cur_state.Apply();
}
bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
const MathUtil::Rectangle<int>& src_rect,
CachedSurface* dst_surface,
const MathUtil::Rectangle<int>& dst_rect) {
if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format,
dst_surface->pixel_format)) {
return false;
}
BlitTextures(src_surface->texture.handle, dst_surface->texture.handle,
CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
return true;
}
static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format,
u32 width, u32 height) {
// Allocate an uninitialized texture of appropriate size and format for the surface
using SurfaceType = CachedSurface::SurfaceType;
OpenGLState cur_state = OpenGLState::GetCurState();
// Keep track of previous texture bindings
GLuint old_tex = cur_state.texture_units[0].texture_2d;
cur_state.texture_units[0].texture_2d = texture;
cur_state.Apply();
glActiveTexture(GL_TEXTURE0);
SurfaceType type = CachedSurface::GetFormatType(pixel_format);
FormatTuple tuple;
if (type == SurfaceType::Color) {
ASSERT((size_t)pixel_format < fb_format_tuples.size());
tuple = fb_format_tuples[(unsigned int)pixel_format];
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
size_t tuple_idx = (size_t)pixel_format - 14;
ASSERT(tuple_idx < depth_format_tuples.size());
tuple = depth_format_tuples[tuple_idx];
} else {
tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
}
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format,
tuple.type, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
// Restore previous texture bindings
cur_state.texture_units[0].texture_2d = old_tex;
cur_state.Apply();
}
MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale,
bool load_if_create) {
using PixelFormat = CachedSurface::PixelFormat;
using SurfaceType = CachedSurface::SurfaceType;
if (params.addr == 0) {
return nullptr;
}
u32 params_size =
params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
// Check for an exact match in existing surfaces
CachedSurface* best_exact_surface = nullptr;
float exact_surface_goodness = -1.f;
auto surface_interval =
boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
auto range = surface_cache.equal_range(surface_interval);
for (auto it = range.first; it != range.second; ++it) {
for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
CachedSurface* surface = it2->get();
// Check if the request matches the surface exactly
if (params.addr == surface->addr && params.width == surface->width &&
params.height == surface->height && params.pixel_format == surface->pixel_format) {
// Make sure optional param-matching criteria are fulfilled
bool tiling_match = (params.is_tiled == surface->is_tiled);
bool res_scale_match = (params.res_scale_width == surface->res_scale_width &&
params.res_scale_height == surface->res_scale_height);
if (!match_res_scale || res_scale_match) {
// Prioritize same-tiling and highest resolution surfaces
float match_goodness =
(float)tiling_match + surface->res_scale_width * surface->res_scale_height;
if (match_goodness > exact_surface_goodness || surface->dirty) {
exact_surface_goodness = match_goodness;
best_exact_surface = surface;
}
}
}
}
}
// Return the best exact surface if found
if (best_exact_surface != nullptr) {
return best_exact_surface;
}
// No matching surfaces found, so create a new one
u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
if (texture_src_data == nullptr) {
return nullptr;
}
MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
// Stride only applies to linear images.
ASSERT(params.pixel_stride == 0 || !params.is_tiled);
std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
new_surface->addr = params.addr;
new_surface->size = params_size;
new_surface->texture.Create();
new_surface->width = params.width;
new_surface->height = params.height;
new_surface->pixel_stride = params.pixel_stride;
new_surface->res_scale_width = params.res_scale_width;
new_surface->res_scale_height = params.res_scale_height;
new_surface->is_tiled = params.is_tiled;
new_surface->pixel_format = params.pixel_format;
new_surface->dirty = false;
if (!load_if_create) {
// Don't load any data; just allocate the surface's texture
AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format,
new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
} else {
// TODO: Consider attempting subrect match in existing surfaces and direct blit here instead
// of memory upload below if that's a common scenario in some game
Memory::RasterizerFlushRegion(params.addr, params_size);
// Load data from memory to the new surface
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_tex = cur_state.texture_units[0].texture_2d;
cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
cur_state.Apply();
glActiveTexture(GL_TEXTURE0);
if (!new_surface->is_tiled) {
// TODO: Ensure this will always be a color format, not a depth or other format
ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride);
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
tuple.format, tuple.type, texture_src_data);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
} else {
SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
FormatTuple tuple;
if ((size_t)params.pixel_format < fb_format_tuples.size()) {
tuple = fb_format_tuples[(unsigned int)params.pixel_format];
} else {
// Texture
tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
}
std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
Pica::Texture::TextureInfo tex_info;
tex_info.width = params.width;
tex_info.height = params.height;
tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format;
tex_info.SetDefaultStride();
tex_info.physical_address = params.addr;
for (unsigned y = 0; y < params.height; ++y) {
for (unsigned x = 0; x < params.width; ++x) {
tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture(
texture_src_data, x, params.height - 1 - y, tex_info);
}
}
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
} else {
// Depth/Stencil formats need special treatment since they aren't sampleable using
// LookupTexture and can't use RGBA format
size_t tuple_idx = (size_t)params.pixel_format - 14;
ASSERT(tuple_idx < depth_format_tuples.size());
const FormatTuple& tuple = depth_format_tuples[tuple_idx];
u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
bool use_4bpp = (params.pixel_format == PixelFormat::D24);
u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
std::vector<u8> temp_fb_depth_buffer(params.width * params.height *
gl_bytes_per_pixel);
u8* temp_fb_depth_buffer_ptr =
use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel,
gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr,
true);
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
0, tuple.format, tuple.type, temp_fb_depth_buffer.data());
}
}
// If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
OGLTexture scaled_texture;
scaled_texture.Create();
AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format,
new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
BlitTextures(new_surface->texture.handle, scaled_texture.handle,
CachedSurface::GetFormatType(new_surface->pixel_format),
MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(),
new_surface->GetScaledHeight()));
new_surface->texture.Release();
new_surface->texture.handle = scaled_texture.handle;
scaled_texture.handle = 0;
cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
cur_state.Apply();
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
cur_state.texture_units[0].texture_2d = old_tex;
cur_state.Apply();
}
Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(
new_surface->addr, new_surface->addr + new_surface->size),
std::set<std::shared_ptr<CachedSurface>>({new_surface})));
return new_surface.get();
}
CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params,
bool match_res_scale, bool load_if_create,
MathUtil::Rectangle<int>& out_rect) {
if (params.addr == 0) {
return nullptr;
}
u32 total_pixels = params.width * params.height;
u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
// Attempt to find encompassing surfaces
CachedSurface* best_subrect_surface = nullptr;
float subrect_surface_goodness = -1.f;
auto surface_interval =
boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
CachedSurface* surface = it2->get();
// Check if the request is contained in the surface
if (params.addr >= surface->addr &&
params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
params.pixel_format == surface->pixel_format) {
// Make sure optional param-matching criteria are fulfilled
bool tiling_match = (params.is_tiled == surface->is_tiled);
bool res_scale_match = (params.res_scale_width == surface->res_scale_width &&
params.res_scale_height == surface->res_scale_height);
if (!match_res_scale || res_scale_match) {
// Prioritize same-tiling and highest resolution surfaces
float match_goodness =
(float)tiling_match + surface->res_scale_width * surface->res_scale_height;
if (match_goodness > subrect_surface_goodness || surface->dirty) {
subrect_surface_goodness = match_goodness;
best_subrect_surface = surface;
}
}
}
}
}
// Return the best subrect surface if found
if (best_subrect_surface != nullptr) {
unsigned int bytes_per_pixel =
(CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
int x0, y0;
if (!params.is_tiled) {
u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
x0 = begin_pixel_index % best_subrect_surface->width;
y0 = begin_pixel_index / best_subrect_surface->width;
out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
} else {
u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
u32 tiles_per_row = best_subrect_surface->width / 8;
u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
x0 = begin_tile_index % tiles_per_row * 8;
y0 = begin_tile_index / tiles_per_row * 8;
// Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
out_rect =
MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width,
best_subrect_surface->height - (y0 + params.height));
}
out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
return best_subrect_surface;
}
// No subrect found - create and return a new surface
if (!params.is_tiled) {
out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width),
(int)(params.height * params.res_scale_height));
} else {
out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height),
(int)(params.width * params.res_scale_width), 0);
}
return GetSurface(params, match_res_scale, load_if_create);
}
CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(
const Pica::TexturingRegs::FullTextureConfig& config) {
Pica::Texture::TextureInfo info =
Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format);
CachedSurface params;
params.addr = info.physical_address;
params.width = info.width;
params.height = info.height;
params.is_tiled = true;
params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
return GetSurface(params, false, true);
}
std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>>
RasterizerCacheOpenGL::GetFramebufferSurfaces(
const Pica::FramebufferRegs::FramebufferConfig& config) {
const auto& regs = Pica::g_state.regs;
// Make sur that framebuffers don't overlap if both color and depth are being used
u32 fb_area = config.GetWidth() * config.GetHeight();
bool framebuffers_overlap =
config.GetColorBufferPhysicalAddress() != 0 &&
config.GetDepthBufferPhysicalAddress() != 0 &&
MathUtil::IntervalsIntersect(
config.GetColorBufferPhysicalAddress(),
fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
config.GetDepthBufferPhysicalAddress(),
fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format));
bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
bool depth_write_enable = regs.framebuffer.output_merger.depth_write_enable &&
regs.framebuffer.framebuffer.allow_depth_stencil_write;
bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 &&
(regs.framebuffer.output_merger.depth_test_enable || depth_write_enable ||
!framebuffers_overlap);
if (framebuffers_overlap && using_color_fb && using_depth_fb) {
LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
"overlapping framebuffers not supported!");
using_depth_fb = false;
}
// get color and depth surfaces
CachedSurface color_params;
CachedSurface depth_params;
color_params.width = depth_params.width = config.GetWidth();
color_params.height = depth_params.height = config.GetHeight();
color_params.is_tiled = depth_params.is_tiled = true;
// Set the internal resolution, assume the same scaling factor for top and bottom screens
float resolution_scale_factor = Settings::values.resolution_factor;
if (resolution_scale_factor == 0.0f) {
// Auto - scale resolution to the window size
resolution_scale_factor = VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio();
}
// Scale the resolution by the specified factor
color_params.res_scale_width = resolution_scale_factor;
depth_params.res_scale_width = resolution_scale_factor;
color_params.res_scale_height = resolution_scale_factor;
depth_params.res_scale_height = resolution_scale_factor;
color_params.addr = config.GetColorBufferPhysicalAddress();
color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
depth_params.addr = config.GetDepthBufferPhysicalAddress();
depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
MathUtil::Rectangle<int> color_rect;
CachedSurface* color_surface =
using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
MathUtil::Rectangle<int> depth_rect;
CachedSurface* depth_surface =
using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
// Sanity check to make sure found surfaces aren't the same
if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
LOG_CRITICAL(
Render_OpenGL,
"Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
using_depth_fb = false;
depth_surface = nullptr;
}
MathUtil::Rectangle<int> rect;
if (color_surface != nullptr && depth_surface != nullptr &&
(depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
// Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if
// they don't match
if (color_rect.left != 0 || color_rect.top != 0) {
color_surface = GetSurface(color_params, true, true);
}
if (depth_rect.left != 0 || depth_rect.top != 0) {
depth_surface = GetSurface(depth_params, true, true);
}
if (!color_surface->is_tiled) {
rect = MathUtil::Rectangle<int>(
0, 0, (int)(color_params.width * color_params.res_scale_width),
(int)(color_params.height * color_params.res_scale_height));
} else {
rect = MathUtil::Rectangle<int>(
0, (int)(color_params.height * color_params.res_scale_height),
(int)(color_params.width * color_params.res_scale_width), 0);
}
} else if (color_surface != nullptr) {
rect = color_rect;
} else if (depth_surface != nullptr) {
rect = depth_rect;
} else {
rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
}
return std::make_tuple(color_surface, depth_surface, rect);
}
CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
auto surface_interval =
boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
auto range = surface_cache.equal_range(surface_interval);
for (auto it = range.first; it != range.second; ++it) {
for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
int bits_per_value = 0;
if (config.fill_24bit) {
bits_per_value = 24;
} else if (config.fill_32bit) {
bits_per_value = 32;
} else {
bits_per_value = 16;
}
CachedSurface* surface = it2->get();
if (surface->addr == config.GetStartAddress() &&
CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
(surface->width * surface->height *
CachedSurface::GetFormatBpp(surface->pixel_format) / 8) ==
(config.GetEndAddress() - config.GetStartAddress())) {
return surface;
}
}
}
return nullptr;
}
MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
using PixelFormat = CachedSurface::PixelFormat;
using SurfaceType = CachedSurface::SurfaceType;
if (!surface->dirty) {
return;
}
MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
if (dst_buffer == nullptr) {
return;
}
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_tex = cur_state.texture_units[0].texture_2d;
OGLTexture unscaled_tex;
GLuint texture_to_flush = surface->texture.handle;
// If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
unscaled_tex.Create();
AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width,
surface->height);
BlitTextures(
surface->texture.handle, unscaled_tex.handle,
CachedSurface::GetFormatType(surface->pixel_format),
MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
texture_to_flush = unscaled_tex.handle;
}
cur_state.texture_units[0].texture_2d = texture_to_flush;
cur_state.Apply();
glActiveTexture(GL_TEXTURE0);
if (!surface->is_tiled) {
// TODO: Ensure this will always be a color format, not a depth or other format
ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride);
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
} else {
SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
// Directly copy pixels. Internal OpenGL color formats are consistent so no conversion
// is necessary.
MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(),
false);
} else {
// Depth/Stencil formats need special treatment since they aren't sampleable using
// LookupTexture and can't use RGBA format
size_t tuple_idx = (size_t)surface->pixel_format - 14;
ASSERT(tuple_idx < depth_format_tuples.size());
const FormatTuple& tuple = depth_format_tuples[tuple_idx];
u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr,
false);
}
}
surface->dirty = false;
cur_state.texture_units[0].texture_2d = old_tex;
cur_state.Apply();
}
void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface,
bool invalidate) {
if (size == 0) {
return;
}
// Gather up unique surfaces that touch the region
std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
std::copy_if(it->second.begin(), it->second.end(),
std::inserter(touching_surfaces, touching_surfaces.end()),
[skip_surface](std::shared_ptr<CachedSurface> surface) {
return (surface.get() != skip_surface);
});
}
// Flush and invalidate surfaces
for (auto surface : touching_surfaces) {
FlushSurface(surface.get());
if (invalidate) {
Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
surface_cache.subtract(
std::make_pair(boost::icl::interval<PAddr>::right_open(
surface->addr, surface->addr + surface->size),
std::set<std::shared_ptr<CachedSurface>>({surface})));
}
}
}
void RasterizerCacheOpenGL::FlushAll() {
for (auto& surfaces : surface_cache) {
for (auto& surface : surfaces.second) {
FlushSurface(surface.get());
}
}
}

View file

@ -1,239 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <set>
#include <tuple>
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#endif
#include <boost/icl/interval_map.hpp>
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/hw/gpu.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace MathUtil {
template <class T>
struct Rectangle;
}
struct CachedSurface;
using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
struct CachedSurface {
enum class PixelFormat {
// First 5 formats are shared between textures and color buffers
RGBA8 = 0,
RGB8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
// Texture-only formats
IA8 = 5,
RG8 = 6,
I8 = 7,
A8 = 8,
IA4 = 9,
I4 = 10,
A4 = 11,
ETC1 = 12,
ETC1A4 = 13,
// Depth buffer-only formats
D16 = 14,
// gap
D24 = 16,
D24S8 = 17,
Invalid = 255,
};
enum class SurfaceType {
Color = 0,
Texture = 1,
Depth = 2,
DepthStencil = 3,
Invalid = 4,
};
static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
static const std::array<unsigned int, 18> bpp_table = {
32, // RGBA8
24, // RGB8
16, // RGB5A1
16, // RGB565
16, // RGBA4
16, // IA8
16, // RG8
8, // I8
8, // A8
8, // IA4
4, // I4
4, // A4
4, // ETC1
8, // ETC1A4
16, // D16
0,
24, // D24
32, // D24S8
};
ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
return bpp_table[(unsigned int)format];
}
static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
}
static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) {
return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
}
static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) {
return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
: PixelFormat::Invalid;
}
static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
switch (format) {
// RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
case GPU::Regs::PixelFormat::RGB565:
return PixelFormat::RGB565;
case GPU::Regs::PixelFormat::RGB5A1:
return PixelFormat::RGB5A1;
default:
return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
}
}
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
(b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
return true;
}
if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
return true;
}
if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
return true;
}
return false;
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
if ((unsigned int)pixel_format < 5) {
return SurfaceType::Color;
}
if ((unsigned int)pixel_format < 14) {
return SurfaceType::Texture;
}
if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
return SurfaceType::Depth;
}
if (pixel_format == PixelFormat::D24S8) {
return SurfaceType::DepthStencil;
}
return SurfaceType::Invalid;
}
u32 GetScaledWidth() const {
return (u32)(width * res_scale_width);
}
u32 GetScaledHeight() const {
return (u32)(height * res_scale_height);
}
PAddr addr;
u32 size;
PAddr min_valid;
PAddr max_valid;
OGLTexture texture;
u32 width;
u32 height;
/// Stride between lines, in pixels. Only valid for images in linear format.
u32 pixel_stride = 0;
float res_scale_width = 1.f;
float res_scale_height = 1.f;
bool is_tiled;
PixelFormat pixel_format;
bool dirty;
};
class RasterizerCacheOpenGL : NonCopyable {
public:
RasterizerCacheOpenGL();
~RasterizerCacheOpenGL();
/// Blits one texture to another
void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type,
const MathUtil::Rectangle<int>& src_rect,
const MathUtil::Rectangle<int>& dst_rect);
/// Attempt to blit one surface's texture to another
bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect,
CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
/// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale,
bool load_if_create);
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// 3DS memory to OpenGL and caches it (if not already cached)
CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale,
bool load_if_create, MathUtil::Rectangle<int>& out_rect);
/// Gets a surface based on the texture configuration
CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
/// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer
/// configuration
std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(
const Pica::FramebufferRegs::FramebufferConfig& config);
/// Attempt to get a surface that exactly matches the fill region and format
CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
/// Write the surface back to memory
void FlushSurface(CachedSurface* surface);
/// Write any cached resources overlapping the region back to memory (if dirty) and optionally
/// invalidate them in the cache
void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
/// Flush all cached resources tracked by this cache manager
void FlushAll();
private:
SurfaceCache surface_cache;
OGLFramebuffer transfer_framebuffers[2];
};

File diff suppressed because it is too large Load diff

View file

@ -1,162 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <string>
#include <type_traits>
#include "video_core/regs.h"
namespace GLShader {
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
/**
* This struct contains all state used to generate the GLSL shader program that emulates the current
* Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*
* We use a union because "implicitly-defined copy/move constructor for a union X copies the object
* representation of X." and "implicitly-defined copy assignment operator for a union X copies the
* object representation (3.9) of X." = Bytewise copy instead of memberwise copy. This is important
* because the padding bytes are included in the hash and comparison between objects.
*/
union PicaShaderConfig {
/// Construct a PicaShaderConfig with the given Pica register configuration.
static PicaShaderConfig BuildFromRegs(const Pica::Regs& regs);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
bool operator==(const PicaShaderConfig& o) const {
return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
};
// NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC.
// This makes BitField not TC when used in a union or struct so we have to resort
// to this ugly hack.
// Once that bug is fixed we can use Pica::Regs::TevStageConfig here.
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
struct State {
Pica::FramebufferRegs::CompareFunc alpha_test_func;
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
bool texture2_use_coord1;
std::array<TevStageConfigRaw, 6> tev_stages;
u8 combiner_buffer_input;
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
struct {
struct {
unsigned num;
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
bool geometric_factor_0;
bool geometric_factor_1;
} light[8];
bool enable;
unsigned src_num;
Pica::LightingRegs::LightingBumpMode bump_mode;
unsigned bump_selector;
bool bump_renorm;
bool clamp_highlights;
Pica::LightingRegs::LightingConfig config;
Pica::LightingRegs::LightingFresnelSelector fresnel_selector;
struct {
bool enable;
bool abs_input;
Pica::LightingRegs::LightingLutInput type;
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
bool enable;
u32 coord;
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
bool separate_alpha;
bool noise_enable;
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
u32 lut_width;
u32 lut_offset;
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
} state;
};
#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value,
"PicaShaderConfig::State must be trivially copyable");
#endif
/**
* Generates the GLSL vertex shader program source code for the current Pica state
* @returns String of the shader source code
*/
std::string GenerateVertexShader();
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const PicaShaderConfig& config);
} // namespace GLShader
namespace std {
template <>
struct hash<GLShader::PicaShaderConfig> {
size_t operator()(const GLShader::PicaShaderConfig& k) const {
return Common::ComputeHash64(&k.state, sizeof(GLShader::PicaShaderConfig::State));
}
};
} // namespace std

View file

@ -1,235 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_texturing.h"
using GLvec2 = std::array<GLfloat, 2>;
using GLvec3 = std::array<GLfloat, 3>;
using GLvec4 = std::array<GLfloat, 4>;
namespace PicaToGL {
inline GLenum TextureFilterMode(Pica::TexturingRegs::TextureConfig::TextureFilter mode) {
static const GLenum filter_mode_table[] = {
GL_NEAREST, // TextureFilter::Nearest
GL_LINEAR, // TextureFilter::Linear
};
// Range check table for input
if (static_cast<size_t>(mode) >= ARRAY_SIZE(filter_mode_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown texture filtering mode %d", mode);
UNREACHABLE();
return GL_LINEAR;
}
GLenum gl_mode = filter_mode_table[mode];
// Check for dummy values indicating an unknown mode
if (gl_mode == 0) {
LOG_CRITICAL(Render_OpenGL, "Unknown texture filtering mode %d", mode);
UNIMPLEMENTED();
return GL_LINEAR;
}
return gl_mode;
}
inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
static const GLenum wrap_mode_table[] = {
GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge
GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder
GL_REPEAT, // WrapMode::Repeat
GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat
// TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
// comments in enum WrapMode.
GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge2
GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder2
GL_REPEAT, // WrapMode::Repeat2
GL_REPEAT, // WrapMode::Repeat3
};
// Range check table for input
if (static_cast<size_t>(mode) >= ARRAY_SIZE(wrap_mode_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown texture wrap mode %d", mode);
UNREACHABLE();
return GL_CLAMP_TO_EDGE;
}
if (static_cast<u32>(mode) > 3) {
Core::Telemetry().AddField(Telemetry::FieldType::Session,
"VideoCore_Pica_UnsupportedTextureWrapMode",
static_cast<u32>(mode));
LOG_WARNING(Render_OpenGL, "Using texture wrap mode %u", static_cast<u32>(mode));
}
GLenum gl_mode = wrap_mode_table[mode];
// Check for dummy values indicating an unknown mode
if (gl_mode == 0) {
LOG_CRITICAL(Render_OpenGL, "Unknown texture wrap mode %d", mode);
UNIMPLEMENTED();
return GL_CLAMP_TO_EDGE;
}
return gl_mode;
}
inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
static const GLenum blend_equation_table[] = {
GL_FUNC_ADD, // BlendEquation::Add
GL_FUNC_SUBTRACT, // BlendEquation::Subtract
GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
GL_MIN, // BlendEquation::Min
GL_MAX, // BlendEquation::Max
};
// Range check table for input
if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation);
UNREACHABLE();
return GL_FUNC_ADD;
}
return blend_equation_table[(unsigned)equation];
}
inline GLenum BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
static const GLenum blend_func_table[] = {
GL_ZERO, // BlendFactor::Zero
GL_ONE, // BlendFactor::One
GL_SRC_COLOR, // BlendFactor::SourceColor
GL_ONE_MINUS_SRC_COLOR, // BlendFactor::OneMinusSourceColor
GL_DST_COLOR, // BlendFactor::DestColor
GL_ONE_MINUS_DST_COLOR, // BlendFactor::OneMinusDestColor
GL_SRC_ALPHA, // BlendFactor::SourceAlpha
GL_ONE_MINUS_SRC_ALPHA, // BlendFactor::OneMinusSourceAlpha
GL_DST_ALPHA, // BlendFactor::DestAlpha
GL_ONE_MINUS_DST_ALPHA, // BlendFactor::OneMinusDestAlpha
GL_CONSTANT_COLOR, // BlendFactor::ConstantColor
GL_ONE_MINUS_CONSTANT_COLOR, // BlendFactor::OneMinusConstantColor
GL_CONSTANT_ALPHA, // BlendFactor::ConstantAlpha
GL_ONE_MINUS_CONSTANT_ALPHA, // BlendFactor::OneMinusConstantAlpha
GL_SRC_ALPHA_SATURATE, // BlendFactor::SourceAlphaSaturate
};
// Range check table for input
if (static_cast<size_t>(factor) >= ARRAY_SIZE(blend_func_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor);
UNREACHABLE();
return GL_ONE;
}
return blend_func_table[(unsigned)factor];
}
inline GLenum LogicOp(Pica::FramebufferRegs::LogicOp op) {
static const GLenum logic_op_table[] = {
GL_CLEAR, // Clear
GL_AND, // And
GL_AND_REVERSE, // AndReverse
GL_COPY, // Copy
GL_SET, // Set
GL_COPY_INVERTED, // CopyInverted
GL_NOOP, // NoOp
GL_INVERT, // Invert
GL_NAND, // Nand
GL_OR, // Or
GL_NOR, // Nor
GL_XOR, // Xor
GL_EQUIV, // Equiv
GL_AND_INVERTED, // AndInverted
GL_OR_REVERSE, // OrReverse
GL_OR_INVERTED, // OrInverted
};
// Range check table for input
if (static_cast<size_t>(op) >= ARRAY_SIZE(logic_op_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", op);
UNREACHABLE();
return GL_COPY;
}
return logic_op_table[(unsigned)op];
}
inline GLenum CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
static const GLenum compare_func_table[] = {
GL_NEVER, // CompareFunc::Never
GL_ALWAYS, // CompareFunc::Always
GL_EQUAL, // CompareFunc::Equal
GL_NOTEQUAL, // CompareFunc::NotEqual
GL_LESS, // CompareFunc::LessThan
GL_LEQUAL, // CompareFunc::LessThanOrEqual
GL_GREATER, // CompareFunc::GreaterThan
GL_GEQUAL, // CompareFunc::GreaterThanOrEqual
};
// Range check table for input
if (static_cast<size_t>(func) >= ARRAY_SIZE(compare_func_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func);
UNREACHABLE();
return GL_ALWAYS;
}
return compare_func_table[(unsigned)func];
}
inline GLenum StencilOp(Pica::FramebufferRegs::StencilAction action) {
static const GLenum stencil_op_table[] = {
GL_KEEP, // StencilAction::Keep
GL_ZERO, // StencilAction::Zero
GL_REPLACE, // StencilAction::Replace
GL_INCR, // StencilAction::Increment
GL_DECR, // StencilAction::Decrement
GL_INVERT, // StencilAction::Invert
GL_INCR_WRAP, // StencilAction::IncrementWrap
GL_DECR_WRAP, // StencilAction::DecrementWrap
};
// Range check table for input
if (static_cast<size_t>(action) >= ARRAY_SIZE(stencil_op_table)) {
LOG_CRITICAL(Render_OpenGL, "Unknown stencil op %d", action);
UNREACHABLE();
return GL_KEEP;
}
return stencil_op_table[(unsigned)action];
}
inline GLvec4 ColorRGBA8(const u32 color) {
return {{
(color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f,
(color >> 24 & 0xFF) / 255.0f,
}};
}
inline std::array<GLfloat, 3> LightColor(const Pica::LightingRegs::LightColor& color) {
return {{
color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
}};
}
} // namespace

View file

@ -13,14 +13,11 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "core/memory.h"
#include "core/settings.h"
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/video_core.h"
@ -128,10 +125,6 @@ void RendererOpenGL::SwapBuffers(const FramebufferInfo& framebuffer_info) {
prev_state.Apply();
RefreshRasterizerSetting();
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
Pica::g_debug_context->recorder->FrameFinished();
}
}
static inline u32 MortonInterleave128(u32 x, u32 y) {

View file

@ -8,7 +8,6 @@
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"