Merge pull request #5927 from ameerj/astc-compute
video_core: Accelerate ASTC texture decoding using compute shaders
This commit is contained in:
commit
5ee669466f
22 changed files with 2027 additions and 1770 deletions
|
@ -307,7 +307,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
|
|||
|
||||
[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
|
||||
const VideoCommon::ImageInfo& info) {
|
||||
// Disable accelerated uploads for now as they don't implement swizzled uploads
|
||||
return !runtime.HasNativeASTC() && IsPixelFormatASTC(info.format);
|
||||
// Disable other accelerated uploads for now as they don't implement swizzled uploads
|
||||
return false;
|
||||
switch (info.type) {
|
||||
case ImageType::e2D:
|
||||
|
@ -569,7 +570,11 @@ void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferM
|
|||
std::span<const SwizzleParameters> swizzles) {
|
||||
switch (image.info.type) {
|
||||
case ImageType::e2D:
|
||||
return util_shaders.BlockLinearUpload2D(image, map, swizzles);
|
||||
if (IsPixelFormatASTC(image.info.format)) {
|
||||
return util_shaders.ASTCDecode(image, map, swizzles);
|
||||
} else {
|
||||
return util_shaders.BlockLinearUpload2D(image, map, swizzles);
|
||||
}
|
||||
case ImageType::e3D:
|
||||
return util_shaders.BlockLinearUpload3D(image, map, swizzles);
|
||||
case ImageType::Linear:
|
||||
|
@ -599,6 +604,10 @@ FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal
|
|||
}
|
||||
}
|
||||
|
||||
bool TextureCacheRuntime::HasNativeASTC() const noexcept {
|
||||
return device.HasASTC();
|
||||
}
|
||||
|
||||
TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
|
||||
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
|
||||
|
||||
|
|
|
@ -95,6 +95,8 @@ public:
|
|||
return has_broken_texture_view_formats;
|
||||
}
|
||||
|
||||
bool HasNativeASTC() const noexcept;
|
||||
|
||||
private:
|
||||
struct StagingBuffers {
|
||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
|
||||
|
@ -11,6 +10,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/astc_decoder_comp.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||
|
@ -20,16 +20,18 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/textures/astc.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using namespace HostShaders;
|
||||
using namespace Tegra::Texture::ASTC;
|
||||
|
||||
using VideoCommon::Extent2D;
|
||||
using VideoCommon::Extent3D;
|
||||
using VideoCommon::ImageCopy;
|
||||
using VideoCommon::ImageType;
|
||||
|
@ -57,7 +59,7 @@ size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
|
|||
} // Anonymous namespace
|
||||
|
||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
: program_manager{program_manager_},
|
||||
: program_manager{program_manager_}, astc_decoder_program(MakeProgram(ASTC_DECODER_COMP)),
|
||||
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
|
||||
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
||||
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
||||
|
@ -65,11 +67,79 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
|||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||
swizzle_table_buffer.Create();
|
||||
astc_buffer.Create();
|
||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0);
|
||||
}
|
||||
|
||||
UtilShaders::~UtilShaders() = default;
|
||||
|
||||
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_ENC_BUFFER = 2;
|
||||
|
||||
static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
|
||||
static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
|
||||
static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
|
||||
static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
|
||||
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
const Extent2D tile_size{
|
||||
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
|
||||
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
|
||||
};
|
||||
program_manager.BindHostCompute(astc_decoder_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle,
|
||||
offsetof(AstcBufferData, encoding_values),
|
||||
sizeof(AstcBufferData::encoding_values));
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle,
|
||||
offsetof(AstcBufferData, replicate_6_to_8),
|
||||
sizeof(AstcBufferData::replicate_6_to_8));
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle,
|
||||
offsetof(AstcBufferData, replicate_7_to_8),
|
||||
sizeof(AstcBufferData::replicate_7_to_8));
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
|
||||
offsetof(AstcBufferData, replicate_8_to_8),
|
||||
sizeof(AstcBufferData::replicate_8_to_8));
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
|
||||
offsetof(AstcBufferData, replicate_byte_to_16),
|
||||
sizeof(AstcBufferData::replicate_byte_to_16));
|
||||
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glUniform2ui(1, tile_size.width, tile_size.height);
|
||||
// Ensure buffer data is valid before dispatching
|
||||
glFlush();
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
|
||||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
|
||||
|
||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
||||
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
|
||||
|
||||
glUniform1ui(2, params.bytes_per_block_log2);
|
||||
glUniform1ui(3, params.layer_stride);
|
||||
glUniform1ui(4, params.block_size);
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, GL_RGBA8);
|
||||
// ASTC texture data
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
|
|
|
@ -40,6 +40,9 @@ public:
|
|||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
~UtilShaders();
|
||||
|
||||
void ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
|
@ -59,7 +62,9 @@ private:
|
|||
ProgramManager& program_manager;
|
||||
|
||||
OGLBuffer swizzle_table_buffer;
|
||||
OGLBuffer astc_buffer;
|
||||
|
||||
OGLProgram astc_decoder_program;
|
||||
OGLProgram block_linear_unswizzle_2d_program;
|
||||
OGLProgram block_linear_unswizzle_3d_program;
|
||||
OGLProgram pitch_unswizzle_program;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue