Merge branch 'yuzu-emu:master' into convert_legacy
This commit is contained in:
commit
e49184e606
360 changed files with 43056 additions and 27212 deletions
|
@ -3,6 +3,7 @@ add_subdirectory(host_shaders)
|
|||
if(LIBVA_FOUND)
|
||||
set_source_files_properties(command_classes/codecs/codec.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
|
||||
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
|
||||
endif()
|
||||
|
||||
add_library(video_core STATIC
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
|
@ -15,12 +17,28 @@
|
|||
|
||||
extern "C" {
|
||||
#include <libavutil/opt.h>
|
||||
#ifdef LIBVA_FOUND
|
||||
// for querying VAAPI driver information
|
||||
#include <libavutil/hwcontext_vaapi.h>
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
namespace {
|
||||
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
|
||||
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
|
||||
constexpr std::array PREFERRED_GPU_DECODERS = {
|
||||
AV_HWDEVICE_TYPE_CUDA,
|
||||
#ifdef _WIN32
|
||||
AV_HWDEVICE_TYPE_D3D11VA,
|
||||
AV_HWDEVICE_TYPE_DXVA2,
|
||||
#elif defined(__linux__)
|
||||
AV_HWDEVICE_TYPE_VAAPI,
|
||||
AV_HWDEVICE_TYPE_VDPAU,
|
||||
#endif
|
||||
// last resort for Linux Flatpak (w/ NVIDIA)
|
||||
AV_HWDEVICE_TYPE_VULKAN,
|
||||
};
|
||||
|
||||
void AVPacketDeleter(AVPacket* ptr) {
|
||||
av_packet_free(&ptr);
|
||||
|
@ -59,46 +77,50 @@ Codec::~Codec() {
|
|||
av_buffer_unref(&av_gpu_decoder);
|
||||
}
|
||||
|
||||
// List all the currently available hwcontext in ffmpeg
|
||||
static std::vector<AVHWDeviceType> ListSupportedContexts() {
|
||||
std::vector<AVHWDeviceType> contexts{};
|
||||
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
|
||||
do {
|
||||
current_device_type = av_hwdevice_iterate_types(current_device_type);
|
||||
contexts.push_back(current_device_type);
|
||||
} while (current_device_type != AV_HWDEVICE_TYPE_NONE);
|
||||
return contexts;
|
||||
}
|
||||
|
||||
bool Codec::CreateGpuAvDevice() {
|
||||
#if defined(LIBVA_FOUND)
|
||||
static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
|
||||
"i915",
|
||||
"iHD",
|
||||
"amdgpu",
|
||||
};
|
||||
AVDictionary* hwdevice_options = nullptr;
|
||||
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
|
||||
for (const auto& driver : VAAPI_DRIVERS) {
|
||||
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
|
||||
const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
|
||||
nullptr, hwdevice_options, 0);
|
||||
if (hwdevice_error >= 0) {
|
||||
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
|
||||
av_dict_free(&hwdevice_options);
|
||||
av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
|
||||
return true;
|
||||
}
|
||||
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
|
||||
}
|
||||
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
|
||||
av_dict_free(&hwdevice_options);
|
||||
#endif
|
||||
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
|
||||
static constexpr std::array GPU_DECODER_TYPES{
|
||||
AV_HWDEVICE_TYPE_CUDA,
|
||||
#ifdef _WIN32
|
||||
AV_HWDEVICE_TYPE_D3D11VA,
|
||||
#else
|
||||
AV_HWDEVICE_TYPE_VDPAU,
|
||||
#endif
|
||||
};
|
||||
for (const auto& type : GPU_DECODER_TYPES) {
|
||||
static const auto supported_contexts = ListSupportedContexts();
|
||||
for (const auto& type : PREFERRED_GPU_DECODERS) {
|
||||
if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
|
||||
[&type](const auto& context) { return context == type; })) {
|
||||
LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
|
||||
continue;
|
||||
}
|
||||
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
|
||||
if (hwdevice_res < 0) {
|
||||
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
|
||||
av_hwdevice_get_type_name(type), hwdevice_res);
|
||||
continue;
|
||||
}
|
||||
#ifdef LIBVA_FOUND
|
||||
if (type == AV_HWDEVICE_TYPE_VAAPI) {
|
||||
// we need to determine if this is an impersonated VAAPI driver
|
||||
AVHWDeviceContext* hwctx =
|
||||
static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
|
||||
AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
|
||||
const char* vendor_name = vaQueryVendorString(vactx->display);
|
||||
if (strstr(vendor_name, "VDPAU backend")) {
|
||||
// VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
|
||||
LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
|
||||
continue;
|
||||
} else {
|
||||
// according to some user testing, certain vaapi driver (Intel?) could be buggy
|
||||
// so let's log the driver name which may help the developers/supporters
|
||||
LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (int i = 0;; i++) {
|
||||
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
|
||||
if (!config) {
|
||||
|
|
|
@ -185,16 +185,6 @@ struct GPU::Impl {
|
|||
return *dma_pusher;
|
||||
}
|
||||
|
||||
/// Returns a reference to the GPU CDMA pusher.
|
||||
[[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
|
||||
return *cdma_pusher;
|
||||
}
|
||||
|
||||
/// Returns a const reference to the GPU CDMA pusher.
|
||||
[[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
|
||||
return *cdma_pusher;
|
||||
}
|
||||
|
||||
/// Returns a reference to the underlying renderer.
|
||||
[[nodiscard]] VideoCore::RendererBase& Renderer() {
|
||||
return *renderer;
|
||||
|
@ -338,25 +328,27 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
|
||||
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
|
||||
if (!use_nvdec) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cdma_pusher) {
|
||||
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
|
||||
if (!cdma_pushers.contains(id)) {
|
||||
cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu));
|
||||
}
|
||||
|
||||
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
|
||||
// TODO(ameerj): RE proper async nvdec operation
|
||||
// gpu_thread.SubmitCommandBuffer(std::move(entries));
|
||||
|
||||
cdma_pusher->ProcessEntries(std::move(entries));
|
||||
cdma_pushers[id]->ProcessEntries(std::move(entries));
|
||||
}
|
||||
|
||||
/// Frees the CDMAPusher instance to free up resources
|
||||
void ClearCdmaInstance() {
|
||||
cdma_pusher.reset();
|
||||
void ClearCdmaInstance(u32 id) {
|
||||
const auto iter = cdma_pushers.find(id);
|
||||
if (iter != cdma_pushers.end()) {
|
||||
cdma_pushers.erase(iter);
|
||||
}
|
||||
}
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
|
@ -659,7 +651,7 @@ struct GPU::Impl {
|
|||
Core::System& system;
|
||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
|
||||
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
|
||||
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
const bool use_nvdec;
|
||||
|
@ -811,14 +803,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const {
|
|||
return impl->DmaPusher();
|
||||
}
|
||||
|
||||
Tegra::CDmaPusher& GPU::CDmaPusher() {
|
||||
return impl->CDmaPusher();
|
||||
}
|
||||
|
||||
const Tegra::CDmaPusher& GPU::CDmaPusher() const {
|
||||
return impl->CDmaPusher();
|
||||
}
|
||||
|
||||
VideoCore::RendererBase& GPU::Renderer() {
|
||||
return impl->Renderer();
|
||||
}
|
||||
|
@ -887,12 +871,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
|
|||
impl->PushGPUEntries(std::move(entries));
|
||||
}
|
||||
|
||||
void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
|
||||
impl->PushCommandBuffer(entries);
|
||||
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
|
||||
impl->PushCommandBuffer(id, entries);
|
||||
}
|
||||
|
||||
void GPU::ClearCdmaInstance() {
|
||||
impl->ClearCdmaInstance();
|
||||
void GPU::ClearCdmaInstance(u32 id) {
|
||||
impl->ClearCdmaInstance(id);
|
||||
}
|
||||
|
||||
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
|
|
|
@ -83,6 +83,7 @@ enum class DepthFormat : u32 {
|
|||
S8_UINT_Z24_UNORM = 0x14,
|
||||
D24X8_UNORM = 0x15,
|
||||
D24S8_UNORM = 0x16,
|
||||
S8_UINT = 0x17,
|
||||
D24C8_UNORM = 0x18,
|
||||
D32_FLOAT_S8X24_UINT = 0x19,
|
||||
};
|
||||
|
@ -241,10 +242,10 @@ public:
|
|||
void PushGPUEntries(Tegra::CommandList&& entries);
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
|
||||
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
|
||||
|
||||
/// Frees the CDMAPusher instance to free up resources
|
||||
void ClearCdmaInstance();
|
||||
void ClearCdmaInstance(u32 id);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
|
|
@ -10,6 +10,8 @@ set(SHADER_FILES
|
|||
astc_decoder.comp
|
||||
block_linear_unswizzle_2d.comp
|
||||
block_linear_unswizzle_3d.comp
|
||||
convert_abgr8_to_d24s8.frag
|
||||
convert_d24s8_to_abgr8.frag
|
||||
convert_depth_to_float.frag
|
||||
convert_float_to_depth.frag
|
||||
full_screen_triangle.vert
|
||||
|
|
18
src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
Normal file
18
src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
Normal file
|
@ -0,0 +1,18 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_shader_stencil_export : require
|
||||
|
||||
layout(binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
uvec4 color = uvec4(texelFetch(color_texture, coord, 0).abgr * (exp2(8) - 1.0f));
|
||||
uvec4 bytes = color << uvec4(24, 16, 8, 0);
|
||||
uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w;
|
||||
|
||||
gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
|
||||
gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
|
||||
}
|
23
src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
Normal file
23
src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
Normal file
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_tex;
|
||||
layout(binding = 1) uniform isampler2D stencil_tex;
|
||||
|
||||
layout(location = 0) out vec4 color;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
|
||||
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
|
||||
|
||||
highp uint depth_val =
|
||||
uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
|
||||
lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
|
||||
highp uvec4 components =
|
||||
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
|
||||
color.abgr = vec4(components) / (exp2(8.0) - 1.0);
|
||||
}
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/bit_util.h"
|
||||
#include "common/literals.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
|
@ -148,6 +149,8 @@ GLenum AttachmentType(PixelFormat format) {
|
|||
switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
|
||||
case SurfaceType::Depth:
|
||||
return GL_DEPTH_ATTACHMENT;
|
||||
case SurfaceType::Stencil:
|
||||
return GL_STENCIL_ATTACHMENT;
|
||||
case SurfaceType::DepthStencil:
|
||||
return GL_DEPTH_STENCIL_ATTACHMENT;
|
||||
default:
|
||||
|
@ -317,13 +320,12 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
|||
}
|
||||
}
|
||||
|
||||
OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) {
|
||||
OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format,
|
||||
GLsizei gl_num_levels) {
|
||||
const GLenum target = ImageTarget(info);
|
||||
const GLsizei width = info.size.width;
|
||||
const GLsizei height = info.size.height;
|
||||
const GLsizei depth = info.size.depth;
|
||||
const int max_host_mip_levels = std::bit_width(info.size.width);
|
||||
const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
|
||||
const GLsizei num_layers = info.resources.layers;
|
||||
const GLsizei num_samples = info.num_samples;
|
||||
|
||||
|
@ -335,10 +337,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
|
|||
}
|
||||
switch (target) {
|
||||
case GL_TEXTURE_1D_ARRAY:
|
||||
glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
|
||||
glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, num_layers);
|
||||
break;
|
||||
case GL_TEXTURE_2D_ARRAY:
|
||||
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
|
||||
glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, num_layers);
|
||||
break;
|
||||
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
|
||||
// TODO: Where should 'fixedsamplelocations' come from?
|
||||
|
@ -348,10 +350,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
|
|||
break;
|
||||
}
|
||||
case GL_TEXTURE_RECTANGLE:
|
||||
glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
|
||||
glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, height);
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
|
||||
glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, depth);
|
||||
break;
|
||||
case GL_TEXTURE_BUFFER:
|
||||
UNREACHABLE();
|
||||
|
@ -397,9 +399,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
|
|||
return GL_R32UI;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NextPow2(u32 value) {
|
||||
return 1U << (32U - std::countl_zero(value - 1U));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ImageBufferMap::~ImageBufferMap() {
|
||||
|
@ -526,8 +525,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
|
|||
}
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::ConvertImage(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
|
||||
format_conversion_pass.ConvertImage(dst, src, copies);
|
||||
}
|
||||
|
@ -696,7 +695,9 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
|
|||
gl_format = tuple.format;
|
||||
gl_type = tuple.type;
|
||||
}
|
||||
texture = MakeImage(info, gl_internal_format);
|
||||
const int max_host_mip_levels = std::bit_width(info.size.width);
|
||||
gl_num_levels = std::min(info.resources.levels, max_host_mip_levels);
|
||||
texture = MakeImage(info, gl_internal_format, gl_num_levels);
|
||||
current_texture = texture.handle;
|
||||
if (runtime->device.HasDebuggingToolAttached()) {
|
||||
const std::string name = VideoCommon::Name(*this);
|
||||
|
@ -724,6 +725,9 @@ void Image::UploadMemory(const ImageBufferMap& map,
|
|||
u32 current_image_height = std::numeric_limits<u32>::max();
|
||||
|
||||
for (const VideoCommon::BufferImageCopy& copy : copies) {
|
||||
if (copy.image_subresource.base_level >= gl_num_levels) {
|
||||
continue;
|
||||
}
|
||||
if (current_row_length != copy.buffer_row_length) {
|
||||
current_row_length = copy.buffer_row_length;
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
|
||||
|
@ -753,6 +757,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
|
|||
u32 current_image_height = std::numeric_limits<u32>::max();
|
||||
|
||||
for (const VideoCommon::BufferImageCopy& copy : copies) {
|
||||
if (copy.image_subresource.base_level >= gl_num_levels) {
|
||||
continue;
|
||||
}
|
||||
if (current_row_length != copy.buffer_row_length) {
|
||||
current_row_length = copy.buffer_row_length;
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
|
||||
|
@ -792,7 +799,7 @@ GLuint Image::StorageHandle() noexcept {
|
|||
}
|
||||
store_view.Create();
|
||||
glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0,
|
||||
info.resources.levels, 0, info.resources.layers);
|
||||
gl_num_levels, 0, info.resources.layers);
|
||||
return store_view.handle;
|
||||
default:
|
||||
return current_texture;
|
||||
|
@ -907,6 +914,8 @@ void Image::Scale(bool up_scale) {
|
|||
return GL_COLOR_ATTACHMENT0;
|
||||
case SurfaceType::Depth:
|
||||
return GL_DEPTH_ATTACHMENT;
|
||||
case SurfaceType::Stencil:
|
||||
return GL_STENCIL_ATTACHMENT;
|
||||
case SurfaceType::DepthStencil:
|
||||
return GL_DEPTH_STENCIL_ATTACHMENT;
|
||||
default:
|
||||
|
@ -920,8 +929,10 @@ void Image::Scale(bool up_scale) {
|
|||
return GL_COLOR_BUFFER_BIT;
|
||||
case SurfaceType::Depth:
|
||||
return GL_DEPTH_BUFFER_BIT;
|
||||
case SurfaceType::Stencil:
|
||||
return GL_STENCIL_BUFFER_BIT;
|
||||
case SurfaceType::DepthStencil:
|
||||
return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT;
|
||||
return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return GL_COLOR_BUFFER_BIT;
|
||||
|
@ -933,8 +944,10 @@ void Image::Scale(bool up_scale) {
|
|||
return 0;
|
||||
case SurfaceType::Depth:
|
||||
return 1;
|
||||
case SurfaceType::DepthStencil:
|
||||
case SurfaceType::Stencil:
|
||||
return 2;
|
||||
case SurfaceType::DepthStencil:
|
||||
return 3;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
|
@ -956,7 +969,7 @@ void Image::Scale(bool up_scale) {
|
|||
auto dst_info = info;
|
||||
dst_info.size.width = scaled_width;
|
||||
dst_info.size.height = scaled_height;
|
||||
upscaled_backup = MakeImage(dst_info, gl_internal_format);
|
||||
upscaled_backup = MakeImage(dst_info, gl_internal_format, gl_num_levels);
|
||||
}
|
||||
const u32 src_width = up_scale ? original_width : scaled_width;
|
||||
const u32 src_height = up_scale ? original_height : scaled_height;
|
||||
|
@ -1264,10 +1277,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||
}
|
||||
|
||||
if (const ImageView* const image_view = depth_buffer; image_view) {
|
||||
if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
|
||||
buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
} else {
|
||||
switch (GetFormatType(image_view->format)) {
|
||||
case SurfaceType::Depth:
|
||||
buffer_bits |= GL_DEPTH_BUFFER_BIT;
|
||||
break;
|
||||
case SurfaceType::Stencil:
|
||||
buffer_bits |= GL_STENCIL_BUFFER_BIT;
|
||||
break;
|
||||
case SurfaceType::DepthStencil:
|
||||
buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
buffer_bits |= GL_DEPTH_BUFFER_BIT;
|
||||
break;
|
||||
}
|
||||
const GLenum attachment = AttachmentType(image_view->format);
|
||||
AttachTexture(handle, attachment, image_view);
|
||||
|
@ -1308,7 +1331,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
|
|||
const u32 copy_size = region.width * region.height * region.depth * img_bpp;
|
||||
if (pbo_size < copy_size) {
|
||||
intermediate_pbo.Create();
|
||||
pbo_size = NextPow2(copy_size);
|
||||
pbo_size = Common::NextPow2(copy_size);
|
||||
glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
// Copy from source to PBO
|
||||
|
|
|
@ -84,9 +84,13 @@ public:
|
|||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
|
||||
UNIMPLEMENTED();
|
||||
|
@ -164,8 +168,8 @@ private:
|
|||
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
|
||||
|
||||
std::array<OGLFramebuffer, 3> rescale_draw_fbos;
|
||||
std::array<OGLFramebuffer, 3> rescale_read_fbos;
|
||||
std::array<OGLFramebuffer, 4> rescale_draw_fbos;
|
||||
std::array<OGLFramebuffer, 4> rescale_read_fbos;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
};
|
||||
|
||||
|
@ -221,6 +225,7 @@ private:
|
|||
GLenum gl_internal_format = GL_NONE;
|
||||
GLenum gl_format = GL_NONE;
|
||||
GLenum gl_type = GL_NONE;
|
||||
GLsizei gl_num_levels{};
|
||||
TextureCacheRuntime* runtime{};
|
||||
GLuint current_texture{};
|
||||
};
|
||||
|
@ -338,7 +343,6 @@ struct TextureCacheParams {
|
|||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true;
|
||||
|
||||
using Runtime = OpenGL::TextureCacheRuntime;
|
||||
using Image = OpenGL::Image;
|
||||
|
|
|
@ -108,6 +108,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
|
|||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
|
||||
{GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
|
||||
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
|
||||
|
@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
|
|||
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
|
||||
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
|
||||
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
|
||||
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
|
||||
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
|
||||
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
|
||||
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
|
||||
if (device.IsExtShaderStencilExportSupported()) {
|
||||
|
@ -448,6 +452,22 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
|
|||
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view, u32 up_scale, u32 down_shift) {
|
||||
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
|
||||
convert_abgr8_to_d24s8_frag, true);
|
||||
ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
|
||||
down_shift);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view, u32 up_scale, u32 down_shift) {
|
||||
ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
|
||||
convert_d24s8_to_abgr8_frag, false);
|
||||
ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale,
|
||||
down_shift);
|
||||
}
|
||||
|
||||
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
|
||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||
|
@ -495,6 +515,101 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
|
|||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view, u32 up_scale, u32 down_shift) {
|
||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||
const VkImageView src_view = src_image_view.ColorView();
|
||||
const VkSampler sampler = *nearest_sampler;
|
||||
const VkExtent2D extent{
|
||||
.width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
|
||||
.height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
|
||||
};
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
|
||||
this](vk::CommandBuffer cmdbuf) {
|
||||
const VkOffset2D offset{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = static_cast<float>(extent.width),
|
||||
.height = static_cast<float>(extent.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 0.0f,
|
||||
};
|
||||
const VkRect2D scissor{
|
||||
.offset = offset,
|
||||
.extent = extent,
|
||||
};
|
||||
const PushConstants push_constants{
|
||||
.tex_scale = {viewport.width, viewport.height},
|
||||
.tex_offset = {0.0f, 0.0f},
|
||||
};
|
||||
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
|
||||
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
|
||||
|
||||
// TODO: Barriers
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view, u32 up_scale, u32 down_shift) {
|
||||
const VkPipelineLayout layout = *two_textures_pipeline_layout;
|
||||
const VkImageView src_depth_view = src_image_view.DepthView();
|
||||
const VkImageView src_stencil_view = src_image_view.StencilView();
|
||||
const VkSampler sampler = *nearest_sampler;
|
||||
const VkExtent2D extent{
|
||||
.width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
|
||||
.height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
|
||||
};
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale,
|
||||
down_shift, this](vk::CommandBuffer cmdbuf) {
|
||||
const VkOffset2D offset{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = static_cast<float>(extent.width),
|
||||
.height = static_cast<float>(extent.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 0.0f,
|
||||
};
|
||||
const VkRect2D scissor{
|
||||
.offset = offset,
|
||||
.extent = extent,
|
||||
};
|
||||
const PushConstants push_constants{
|
||||
.tex_scale = {viewport.width, viewport.height},
|
||||
.tex_offset = {0.0f, 0.0f},
|
||||
};
|
||||
const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
|
||||
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
|
||||
src_stencil_view);
|
||||
// TODO: Barriers
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
|
||||
const auto it = std::ranges::find(blit_color_keys, key);
|
||||
if (it != blit_color_keys.end()) {
|
||||
|
@ -636,4 +751,44 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend
|
|||
});
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool is_target_depth,
|
||||
bool single_texture) {
|
||||
if (pipeline) {
|
||||
return;
|
||||
}
|
||||
const std::array stages = MakeStages(*full_screen_vert, *module);
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool single_texture) {
|
||||
ConvertPipelineEx(pipeline, renderpass, module, false, single_texture);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool single_texture) {
|
||||
ConvertPipelineEx(pipeline, renderpass, module, true, single_texture);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -56,10 +56,22 @@ public:
|
|||
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||
u32 up_scale, u32 down_shift);
|
||||
|
||||
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
|
||||
u32 up_scale, u32 down_shift);
|
||||
|
||||
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
|
||||
u32 up_scale, u32 down_shift);
|
||||
|
||||
private:
|
||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view, u32 up_scale, u32 down_shift);
|
||||
|
||||
void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view, u32 up_scale, u32 down_shift);
|
||||
|
||||
void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view, u32 up_scale, u32 down_shift);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
|
||||
|
@ -68,6 +80,15 @@ private:
|
|||
|
||||
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool is_target_depth, bool single_texture);
|
||||
|
||||
void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool single_texture);
|
||||
|
||||
void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool single_texture);
|
||||
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
StateTracker& state_tracker;
|
||||
|
@ -83,6 +104,8 @@ private:
|
|||
vk::ShaderModule blit_depth_stencil_frag;
|
||||
vk::ShaderModule convert_depth_to_float_frag;
|
||||
vk::ShaderModule convert_float_to_depth_frag;
|
||||
vk::ShaderModule convert_abgr8_to_d24s8_frag;
|
||||
vk::ShaderModule convert_d24s8_to_abgr8_frag;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
|
||||
|
@ -94,6 +117,8 @@ private:
|
|||
vk::Pipeline convert_r32_to_d32_pipeline;
|
||||
vk::Pipeline convert_d16_to_r16_pipeline;
|
||||
vk::Pipeline convert_r16_to_d16_pipeline;
|
||||
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
|
||||
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -162,7 +162,7 @@ struct FormatTuple {
|
|||
{VK_FORMAT_UNDEFINED}, // R16_SINT
|
||||
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
|
||||
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
|
||||
{VK_FORMAT_UNDEFINED}, // R16G16_UINT
|
||||
{VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT
|
||||
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
|
||||
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
|
||||
{VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
|
||||
|
@ -176,8 +176,8 @@ struct FormatTuple {
|
|||
{VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT
|
||||
{VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT
|
||||
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
|
||||
{VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM
|
||||
{VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM
|
||||
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM
|
||||
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM
|
||||
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
|
||||
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
|
||||
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
|
||||
|
@ -208,6 +208,9 @@ struct FormatTuple {
|
|||
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
|
||||
{VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
|
||||
|
||||
// Stencil formats
|
||||
{VK_FORMAT_S8_UINT, Attachable}, // S8_UINT
|
||||
|
||||
// DepthStencil formats
|
||||
{VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT
|
||||
{VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/settings.h"
|
||||
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
|
@ -102,6 +103,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
|||
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
break;
|
||||
case VideoCore::Surface::SurfaceType::Depth:
|
||||
case VideoCore::Surface::SurfaceType::Stencil:
|
||||
case VideoCore::Surface::SurfaceType::DepthStencil:
|
||||
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
break;
|
||||
|
@ -173,6 +175,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
|||
return VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
case VideoCore::Surface::SurfaceType::Depth:
|
||||
return VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
case VideoCore::Surface::SurfaceType::Stencil:
|
||||
return VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
case VideoCore::Surface::SurfaceType::DepthStencil:
|
||||
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
default:
|
||||
|
@ -195,6 +199,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
|||
case PixelFormat::D16_UNORM:
|
||||
case PixelFormat::D32_FLOAT:
|
||||
return VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
case PixelFormat::S8_UINT:
|
||||
return VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
default:
|
||||
return VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
}
|
||||
|
@ -308,6 +314,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
|||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src,
|
||||
VkImageAspectFlags aspect_mask) noexcept {
|
||||
return VkBufferImageCopy{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = MakeImageSubresourceLayers(
|
||||
is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask),
|
||||
.imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset),
|
||||
.imageExtent = MakeExtent3D(copy.extent),
|
||||
};
|
||||
}
|
||||
|
||||
[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
|
||||
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
|
||||
std::vector<VkBufferCopy> result(copies.size());
|
||||
|
@ -754,6 +773,173 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
|||
return staging_buffer_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
|
||||
if (VideoCore::Surface::GetFormatType(dst.info.format) ==
|
||||
VideoCore::Surface::SurfaceType::DepthStencil &&
|
||||
!device.IsExtShaderStencilExportSupported()) {
|
||||
return true;
|
||||
}
|
||||
if (VideoCore::Surface::GetFormatType(src.info.format) ==
|
||||
VideoCore::Surface::SurfaceType::DepthStencil &&
|
||||
!device.IsExtShaderStencilExportSupported()) {
|
||||
return true;
|
||||
}
|
||||
if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT ||
|
||||
src.info.format == PixelFormat::D32_FLOAT_S8_UINT) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
|
||||
const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
|
||||
if (buffer_commits[level]) {
|
||||
return *buffers[level];
|
||||
}
|
||||
const auto new_size = Common::NextPow2(needed_size);
|
||||
static constexpr VkBufferUsageFlags flags =
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
|
||||
buffers[level] = device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = new_size,
|
||||
.usage = flags,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
buffer_commits[level] = std::make_unique<MemoryCommit>(
|
||||
memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
|
||||
return *buffers[level];
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
|
||||
std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
|
||||
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
|
||||
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
|
||||
|
||||
std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) {
|
||||
return MakeBufferImageCopy(copy, true, src_aspect_mask);
|
||||
});
|
||||
std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) {
|
||||
return MakeBufferImageCopy(copy, false, dst_aspect_mask);
|
||||
});
|
||||
const u32 img_bpp = BytesPerBlock(src.info.format);
|
||||
size_t total_size = 0;
|
||||
for (const auto& copy : copies) {
|
||||
total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp;
|
||||
}
|
||||
const VkBuffer copy_buffer = GetTemporaryBuffer(total_size);
|
||||
const VkImage dst_image = dst.Handle();
|
||||
const VkImage src_image = src.Handle();
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask,
|
||||
vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) {
|
||||
RangedBarrierRange dst_range;
|
||||
RangedBarrierRange src_range;
|
||||
for (const VkBufferImageCopy& copy : vk_in_copies) {
|
||||
src_range.AddLayers(copy.imageSubresource);
|
||||
}
|
||||
for (const VkBufferImageCopy& copy : vk_out_copies) {
|
||||
dst_range.AddLayers(copy.imageSubresource);
|
||||
}
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
const std::array pre_barriers{
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange = src_range.SubresourceRange(src_aspect_mask),
|
||||
},
|
||||
};
|
||||
const std::array middle_in_barrier{
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = 0,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange = src_range.SubresourceRange(src_aspect_mask),
|
||||
},
|
||||
};
|
||||
const std::array middle_out_barrier{
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
|
||||
},
|
||||
};
|
||||
const std::array post_barriers{
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, {}, {}, pre_barriers);
|
||||
|
||||
cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
|
||||
vk_in_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER, nullptr, middle_in_barrier);
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER, {}, middle_out_barrier);
|
||||
cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, {}, {}, post_barriers);
|
||||
});
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
|
@ -881,6 +1067,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
|
|||
return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::A8B8G8R8_UNORM:
|
||||
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
|
||||
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::R32_FLOAT:
|
||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||
return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
|
||||
|
@ -891,6 +1082,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
|
|||
return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::S8_UINT_D24_UNORM:
|
||||
return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift);
|
||||
break;
|
||||
case PixelFormat::D32_FLOAT:
|
||||
if (src_view.format == PixelFormat::R32_FLOAT) {
|
||||
return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
|
||||
|
@ -1386,6 +1580,14 @@ VkImageView ImageView::StencilView() {
|
|||
return *stencil_view;
|
||||
}
|
||||
|
||||
VkImageView ImageView::ColorView() {
|
||||
if (color_view) {
|
||||
return *color_view;
|
||||
}
|
||||
color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
return *color_view;
|
||||
}
|
||||
|
||||
VkImageView ImageView::StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format) {
|
||||
if (image_format == Shader::ImageFormat::Typeless) {
|
||||
|
|
|
@ -61,6 +61,10 @@ public:
|
|||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
bool ShouldReinterpret(Image& dst, Image& src);
|
||||
|
||||
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
|
||||
|
||||
bool CanAccelerateImageUpload(Image&) const noexcept {
|
||||
|
@ -82,6 +86,8 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
|
||||
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
MemoryAllocator& memory_allocator;
|
||||
|
@ -90,6 +96,10 @@ public:
|
|||
ASTCDecoderPass& astc_decoder_pass;
|
||||
RenderPassCache& render_pass_cache;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
|
||||
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
|
||||
std::array<vk::Buffer, indexing_slots> buffers{};
|
||||
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
|
@ -174,6 +184,8 @@ public:
|
|||
|
||||
[[nodiscard]] VkImageView StencilView();
|
||||
|
||||
[[nodiscard]] VkImageView ColorView();
|
||||
|
||||
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format);
|
||||
|
||||
|
@ -214,6 +226,7 @@ private:
|
|||
std::unique_ptr<StorageViews> storage_views;
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
vk::ImageView color_view;
|
||||
VkImage image_handle = VK_NULL_HANDLE;
|
||||
VkImageView render_target = VK_NULL_HANDLE;
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
@ -316,7 +329,6 @@ struct TextureCacheParams {
|
|||
static constexpr bool FRAMEBUFFER_BLITS = false;
|
||||
static constexpr bool HAS_EMULATED_COPIES = false;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false;
|
||||
|
||||
using Runtime = Vulkan::TextureCacheRuntime;
|
||||
using Image = Vulkan::Image;
|
||||
|
|
|
@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept {
|
|||
const int now_complete = num_complete.load(std::memory_order::relaxed);
|
||||
const int now_building = num_building.load(std::memory_order::relaxed);
|
||||
if (now_complete == now_building) {
|
||||
const auto now = std::chrono::high_resolution_clock::now();
|
||||
const auto now = std::chrono::steady_clock::now();
|
||||
if (completed && num_complete == num_when_completed) {
|
||||
if (now - complete_time > TIME_TO_STOP_REPORTING) {
|
||||
report_base = now_complete;
|
||||
|
|
|
@ -28,6 +28,6 @@ private:
|
|||
|
||||
bool completed{};
|
||||
int num_when_completed{};
|
||||
std::chrono::high_resolution_clock::time_point complete_time;
|
||||
std::chrono::steady_clock::time_point complete_time;
|
||||
};
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -82,6 +82,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
|
|||
return PixelFormat::D32_FLOAT;
|
||||
case Tegra::DepthFormat::D16_UNORM:
|
||||
return PixelFormat::D16_UNORM;
|
||||
case Tegra::DepthFormat::S8_UINT:
|
||||
return PixelFormat::S8_UINT;
|
||||
case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
|
||||
return PixelFormat::D32_FLOAT_S8_UINT;
|
||||
default:
|
||||
|
@ -213,6 +215,11 @@ SurfaceType GetFormatType(PixelFormat pixel_format) {
|
|||
return SurfaceType::Depth;
|
||||
}
|
||||
|
||||
if (static_cast<std::size_t>(pixel_format) <
|
||||
static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) {
|
||||
return SurfaceType::Stencil;
|
||||
}
|
||||
|
||||
if (static_cast<std::size_t>(pixel_format) <
|
||||
static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
|
||||
return SurfaceType::DepthStencil;
|
||||
|
|
|
@ -110,8 +110,12 @@ enum class PixelFormat {
|
|||
|
||||
MaxDepthFormat,
|
||||
|
||||
// Stencil formats
|
||||
S8_UINT = MaxDepthFormat,
|
||||
MaxStencilFormat,
|
||||
|
||||
// DepthStencil formats
|
||||
D24_UNORM_S8_UINT = MaxDepthFormat,
|
||||
D24_UNORM_S8_UINT = MaxStencilFormat,
|
||||
S8_UINT_D24_UNORM,
|
||||
D32_FLOAT_S8_UINT,
|
||||
|
||||
|
@ -125,8 +129,9 @@ constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max
|
|||
enum class SurfaceType {
|
||||
ColorTexture = 0,
|
||||
Depth = 1,
|
||||
DepthStencil = 2,
|
||||
Invalid = 3,
|
||||
Stencil = 2,
|
||||
DepthStencil = 3,
|
||||
Invalid = 4,
|
||||
};
|
||||
|
||||
enum class SurfaceTarget {
|
||||
|
@ -229,6 +234,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
|
|||
1, // E5B9G9R9_FLOAT
|
||||
1, // D32_FLOAT
|
||||
1, // D16_UNORM
|
||||
1, // S8_UINT
|
||||
1, // D24_UNORM_S8_UINT
|
||||
1, // S8_UINT_D24_UNORM
|
||||
1, // D32_FLOAT_S8_UINT
|
||||
|
@ -328,6 +334,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
|
|||
1, // E5B9G9R9_FLOAT
|
||||
1, // D32_FLOAT
|
||||
1, // D16_UNORM
|
||||
1, // S8_UINT
|
||||
1, // D24_UNORM_S8_UINT
|
||||
1, // S8_UINT_D24_UNORM
|
||||
1, // D32_FLOAT_S8_UINT
|
||||
|
@ -427,6 +434,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
|
|||
32, // E5B9G9R9_FLOAT
|
||||
32, // D32_FLOAT
|
||||
16, // D16_UNORM
|
||||
8, // S8_UINT
|
||||
32, // D24_UNORM_S8_UINT
|
||||
32, // S8_UINT_D24_UNORM
|
||||
64, // D32_FLOAT_S8_UINT
|
||||
|
|
|
@ -139,6 +139,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
|
|||
return PixelFormat::D16_UNORM;
|
||||
case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
|
||||
return PixelFormat::S8_UINT_D24_UNORM;
|
||||
case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR):
|
||||
return PixelFormat::S8_UINT_D24_UNORM;
|
||||
case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
|
||||
return PixelFormat::S8_UINT_D24_UNORM;
|
||||
case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
|
||||
|
|
|
@ -194,6 +194,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
|
|||
return "D32_FLOAT";
|
||||
case PixelFormat::D16_UNORM:
|
||||
return "D16_UNORM";
|
||||
case PixelFormat::S8_UINT:
|
||||
return "S8_UINT";
|
||||
case PixelFormat::D24_UNORM_S8_UINT:
|
||||
return "D24_UNORM_S8_UINT";
|
||||
case PixelFormat::S8_UINT_D24_UNORM:
|
||||
|
|
|
@ -472,9 +472,10 @@ template <class P>
|
|||
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Config& copy) {
|
||||
const BlitImages images = GetBlitImages(dst, src);
|
||||
const BlitImages images = GetBlitImages(dst, src, copy);
|
||||
const ImageId dst_id = images.dst_id;
|
||||
const ImageId src_id = images.src_id;
|
||||
|
||||
PrepareImage(src_id, false, false);
|
||||
PrepareImage(dst_id, true, false);
|
||||
|
||||
|
@ -758,14 +759,18 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|||
return ImageId{};
|
||||
}
|
||||
}
|
||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||
const bool broken_views =
|
||||
runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews);
|
||||
const bool native_bgr = runtime.HasNativeBgr();
|
||||
ImageId image_id;
|
||||
const bool flexible_formats = True(options & RelaxedOptions::Format);
|
||||
ImageId image_id{};
|
||||
boost::container::small_vector<ImageId, 1> image_ids;
|
||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
|
||||
return false;
|
||||
}
|
||||
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
|
||||
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear)
|
||||
[[unlikely]] {
|
||||
const bool strict_size = False(options & RelaxedOptions::Size) &&
|
||||
True(existing_image.flags & ImageFlagBits::Strong);
|
||||
const ImageInfo& existing = existing_image.info;
|
||||
|
@ -774,17 +779,27 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|||
IsPitchLinearSameSize(existing, info, strict_size) &&
|
||||
IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
|
||||
image_id = existing_image_id;
|
||||
return true;
|
||||
image_ids.push_back(existing_image_id);
|
||||
return !flexible_formats && existing.format == info.format;
|
||||
}
|
||||
} else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
|
||||
native_bgr)) {
|
||||
image_id = existing_image_id;
|
||||
return true;
|
||||
image_ids.push_back(existing_image_id);
|
||||
return !flexible_formats && existing_image.info.format == info.format;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
|
||||
return image_id;
|
||||
if (image_ids.size() <= 1) [[likely]] {
|
||||
return image_id;
|
||||
}
|
||||
auto image_ids_compare = [this](ImageId a, ImageId b) {
|
||||
auto& image_a = slot_images[a];
|
||||
auto& image_b = slot_images[b];
|
||||
return image_a.modification_tick < image_b.modification_tick;
|
||||
};
|
||||
return *std::ranges::max_element(image_ids, image_ids_compare);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -1076,31 +1091,66 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||
|
||||
template <class P>
|
||||
typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
|
||||
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
|
||||
static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
|
||||
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Config& copy) {
|
||||
|
||||
static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples;
|
||||
const GPUVAddr dst_addr = dst.Address();
|
||||
const GPUVAddr src_addr = src.Address();
|
||||
ImageInfo dst_info(dst);
|
||||
ImageInfo src_info(src);
|
||||
const bool can_be_depth_blit =
|
||||
dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point;
|
||||
ImageId dst_id;
|
||||
ImageId src_id;
|
||||
RelaxedOptions try_options = FIND_OPTIONS;
|
||||
if (can_be_depth_blit) {
|
||||
try_options |= RelaxedOptions::Format;
|
||||
}
|
||||
do {
|
||||
has_deleted_images = false;
|
||||
dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
|
||||
src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
|
||||
const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
|
||||
src_id = FindImage(src_info, src_addr, try_options);
|
||||
dst_id = FindImage(dst_info, dst_addr, try_options);
|
||||
const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
|
||||
DeduceBlitImages(dst_info, src_info, dst_image, src_image);
|
||||
if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
|
||||
continue;
|
||||
if (src_image && src_image->info.num_samples > 1) {
|
||||
RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
|
||||
src_id = FindOrInsertImage(src_info, src_addr, find_options);
|
||||
dst_id = FindOrInsertImage(dst_info, dst_addr, find_options);
|
||||
if (has_deleted_images) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!dst_id) {
|
||||
dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
|
||||
if (can_be_depth_blit) {
|
||||
const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
|
||||
DeduceBlitImages(dst_info, src_info, dst_image, src_image);
|
||||
if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!src_id) {
|
||||
src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
|
||||
}
|
||||
if (!dst_id) {
|
||||
dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
|
||||
}
|
||||
} while (has_deleted_images);
|
||||
const ImageBase& src_image = slot_images[src_id];
|
||||
const ImageBase& dst_image = slot_images[dst_id];
|
||||
const bool native_bgr = runtime.HasNativeBgr();
|
||||
if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) ||
|
||||
GetFormatType(src_info.format) != GetFormatType(src_image.info.format) ||
|
||||
!VideoCore::Surface::IsViewCompatible(dst_info.format, dst_image.info.format, false,
|
||||
native_bgr) ||
|
||||
!VideoCore::Surface::IsViewCompatible(src_info.format, src_image.info.format, false,
|
||||
native_bgr)) {
|
||||
// Make sure the images match the expected format.
|
||||
do {
|
||||
has_deleted_images = false;
|
||||
src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{});
|
||||
dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
|
||||
} while (has_deleted_images);
|
||||
}
|
||||
return BlitImages{
|
||||
.dst_id = dst_id,
|
||||
.src_id = src_id,
|
||||
|
@ -1157,7 +1207,14 @@ template <class P>
|
|||
ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||
bool is_clear) {
|
||||
const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
|
||||
const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
|
||||
ImageId image_id{};
|
||||
bool delete_state = has_deleted_images;
|
||||
do {
|
||||
has_deleted_images = false;
|
||||
image_id = FindOrInsertImage(info, gpu_addr, options);
|
||||
delete_state |= has_deleted_images;
|
||||
} while (has_deleted_images);
|
||||
has_deleted_images = delete_state;
|
||||
if (!image_id) {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
|
@ -1759,8 +1816,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
|
|||
}
|
||||
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
|
||||
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
|
||||
if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) {
|
||||
return runtime.ConvertImage(dst, src, copies);
|
||||
if (runtime.ShouldReinterpret(dst, src)) {
|
||||
return runtime.ReinterpretImage(dst, src, copies);
|
||||
}
|
||||
for (const ImageCopy& copy : copies) {
|
||||
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
|
||||
|
@ -1780,7 +1837,13 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
|
|||
const SubresourceExtent src_extent{.levels = 1, .layers = 1};
|
||||
const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
|
||||
const SubresourceRange src_range{.base = src_base, .extent = src_extent};
|
||||
const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
|
||||
PixelFormat dst_format = dst.info.format;
|
||||
if (GetFormatType(src.info.format) == SurfaceType::DepthStencil &&
|
||||
GetFormatType(dst_format) == SurfaceType::ColorTexture &&
|
||||
BytesPerBlock(dst_format) == 4) {
|
||||
dst_format = PixelFormat::A8B8G8R8_UNORM;
|
||||
}
|
||||
const ImageViewInfo dst_view_info(ImageViewType::e2D, dst_format, dst_range);
|
||||
const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
|
||||
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
|
||||
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
|
||||
|
|
|
@ -59,8 +59,6 @@ class TextureCache {
|
|||
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
|
||||
/// True when the API can provide info about the memory of the device.
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||
/// True when the API provides utilities for pixel format conversions.
|
||||
static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS;
|
||||
|
||||
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
|
||||
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
|
||||
|
@ -254,7 +252,8 @@ private:
|
|||
|
||||
/// Return a blit image pair from the given guest blit parameters
|
||||
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Surface& src);
|
||||
const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Config& copy);
|
||||
|
||||
/// Find or create a sampler from a guest descriptor sampler
|
||||
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
|
||||
|
|
|
@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 {
|
|||
Size = 1 << 0,
|
||||
Format = 1 << 1,
|
||||
Samples = 1 << 2,
|
||||
ForceBrokenViews = 1 << 3,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
|
||||
|
||||
|
|
|
@ -1151,6 +1151,7 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
|
|||
|
||||
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
|
||||
const ImageBase* src) {
|
||||
const auto original_dst_format = dst_info.format;
|
||||
if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
|
||||
src_info.format = src->info.format;
|
||||
}
|
||||
|
@ -1161,7 +1162,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase*
|
|||
dst_info.format = src->info.format;
|
||||
}
|
||||
if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
|
||||
src_info.format = dst->info.format;
|
||||
if (src) {
|
||||
if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) {
|
||||
dst_info.format = original_dst_format;
|
||||
}
|
||||
} else {
|
||||
src_info.format = dst->info.format;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,10 +55,4 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
|
|||
}
|
||||
}
|
||||
|
||||
float GetResolutionScaleFactor(const RendererBase& renderer) {
|
||||
return Settings::values.resolution_info.active
|
||||
? Settings::values.resolution_info.up_factor
|
||||
: renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio();
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -25,6 +25,4 @@ class RendererBase;
|
|||
/// Creates an emulated GPU instance using the given system context.
|
||||
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
|
||||
|
||||
float GetResolutionScaleFactor(const RendererBase& renderer);
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -21,6 +21,13 @@
|
|||
namespace Vulkan {
|
||||
namespace {
|
||||
namespace Alternatives {
|
||||
constexpr std::array STENCIL8_UINT{
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
|
@ -74,6 +81,8 @@ void SetNext(void**& next, T& data) {
|
|||
|
||||
constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
|
||||
switch (format) {
|
||||
case VK_FORMAT_S8_UINT:
|
||||
return Alternatives::STENCIL8_UINT.data();
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
|
||||
case VK_FORMAT_D16_UNORM_S8_UINT:
|
||||
|
@ -121,6 +130,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
|
|||
VK_FORMAT_R16G16_UNORM,
|
||||
VK_FORMAT_R16G16_SNORM,
|
||||
VK_FORMAT_R16G16_SFLOAT,
|
||||
VK_FORMAT_R16G16_UINT,
|
||||
VK_FORMAT_R16G16_SINT,
|
||||
VK_FORMAT_R16_UNORM,
|
||||
VK_FORMAT_R16_SNORM,
|
||||
|
@ -145,6 +155,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
|
|||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||
VK_FORMAT_D32_SFLOAT,
|
||||
VK_FORMAT_D16_UNORM,
|
||||
VK_FORMAT_S8_UINT,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue