mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-05 02:03:16 +00:00
texture_cache: detiler: added support for block coded 128bit images
This commit is contained in:
parent
1a66fa098f
commit
fa73812f32
8 changed files with 115 additions and 35 deletions
|
@ -3,7 +3,8 @@
|
|||
|
||||
set(SHADER_FILES
|
||||
detile_m8x1.comp
|
||||
detile_m8x4.comp
|
||||
detile_m32x1.comp
|
||||
detile_m32x4.comp
|
||||
)
|
||||
|
||||
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||
|
|
|
@ -8,14 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
|||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(rgba8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
layout(r32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint pitch;
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
uint lut_8x4[16] = {
|
||||
uint rmort[16] = {
|
||||
0x11011000, 0x31213020,
|
||||
0x13031202, 0x33233222,
|
||||
0x51415040, 0x71617060,
|
||||
|
@ -27,17 +27,17 @@ uint lut_8x4[16] = {
|
|||
0x57475646, 0x77677666,
|
||||
};
|
||||
|
||||
#define MICRO_TILE_DIM 8
|
||||
#define TEXELS_PER_ELEMENT 1
|
||||
#define MICRO_TILE_DIM (8)
|
||||
#define TEXELS_PER_ELEMENT (1)
|
||||
|
||||
void main() {
|
||||
uint src_tx = in_data[gl_GlobalInvocationID.x];
|
||||
|
||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||
uint packed_pos = lut_8x4[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint p0 = in_data[gl_GlobalInvocationID.x];
|
||||
|
||||
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||
|
@ -46,12 +46,5 @@ void main() {
|
|||
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||
|
||||
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||
|
||||
uvec4 dst_tx = uvec4(
|
||||
bitfieldExtract(src_tx, 0, 8),
|
||||
bitfieldExtract(src_tx, 8, 8),
|
||||
bitfieldExtract(src_tx, 16, 8),
|
||||
bitfieldExtract(src_tx, 24, 8)
|
||||
);
|
||||
imageStore(output_img, img_pos, dst_tx);
|
||||
imageStore(output_img, img_pos, uvec4(p0, 0, 0, 0));
|
||||
}
|
50
src/video_core/host_shaders/detile_m32x4.comp
Normal file
50
src/video_core/host_shaders/detile_m32x4.comp
Normal file
|
@ -0,0 +1,50 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(rgba32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint pitch;
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
uint rmort[16] = {
|
||||
0x11011000, 0x31213020,
|
||||
0x13031202, 0x33233222,
|
||||
0x51415040, 0x71617060,
|
||||
0x53435242, 0x73637262,
|
||||
|
||||
0x15051404, 0x35253424,
|
||||
0x17071606, 0x37273626,
|
||||
0x55455444, 0x75657464,
|
||||
0x57475646, 0x77677666,
|
||||
};
|
||||
|
||||
#define MICRO_TILE_DIM (8)
|
||||
|
||||
void main() {
|
||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint block_ofs = 4 * gl_GlobalInvocationID.x;
|
||||
uint p0 = in_data[block_ofs + 0];
|
||||
uint p1 = in_data[block_ofs + 1];
|
||||
uint p2 = in_data[block_ofs + 2];
|
||||
uint p3 = in_data[block_ofs + 3];
|
||||
|
||||
uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4
|
||||
ivec2 img_pos = MICRO_TILE_DIM * ivec2(
|
||||
gl_WorkGroupID.x % tiles_per_pitch,
|
||||
gl_WorkGroupID.x / tiles_per_pitch
|
||||
);
|
||||
imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, p2, p3));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue