video_core: Rewrite the texture cache
The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
This commit is contained in:
parent
9106ac1e6b
commit
9764c13d6d
152 changed files with 10609 additions and 8351 deletions
|
@ -18,6 +18,7 @@
|
|||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
|
|||
return params;
|
||||
}
|
||||
|
||||
static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
|
||||
static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
|
||||
u32 blockHeight) {
|
||||
// Don't actually care about the void extent, just read the bits...
|
||||
for (s32 i = 0; i < 4; ++i) {
|
||||
|
@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
|
|||
}
|
||||
}
|
||||
|
||||
static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
|
||||
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
|
||||
for (u32 j = 0; j < blockHeight; j++) {
|
||||
for (u32 i = 0; i < blockWidth; i++) {
|
||||
outBuf[j * blockWidth + i] = 0xFFFF00FF;
|
||||
|
@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
|
|||
#undef READ_INT_VALUES
|
||||
}
|
||||
|
||||
static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
|
||||
u32* outBuf) {
|
||||
InputBitStream strm(inBuf);
|
||||
static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
|
||||
const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
|
||||
InputBitStream strm(inBuf.data());
|
||||
TexelWeightParams weightParams = DecodeBlockInfo(strm);
|
||||
|
||||
// Was there an error?
|
||||
|
@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
|
|||
}
|
||||
|
||||
// Read the texel weight data..
|
||||
u8 texelWeightData[16];
|
||||
memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
|
||||
std::array<u8, 16> texelWeightData;
|
||||
std::ranges::copy(inBuf, texelWeightData.begin());
|
||||
|
||||
// Reverse everything
|
||||
for (u32 i = 0; i < 8; i++) {
|
||||
|
@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
|
|||
|
||||
// Make sure that higher non-texel bits are set to zero
|
||||
const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
|
||||
texelWeightData[clearByteStart - 1] =
|
||||
texelWeightData[clearByteStart - 1] &
|
||||
static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
|
||||
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
|
||||
if (clearByteStart > 0) {
|
||||
texelWeightData[clearByteStart - 1] &=
|
||||
static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
|
||||
}
|
||||
std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
|
||||
|
||||
IntegerEncodedVector texelWeightValues;
|
||||
|
||||
InputBitStream weightStream(texelWeightData);
|
||||
InputBitStream weightStream(texelWeightData.data());
|
||||
|
||||
DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
|
||||
weightParams.GetNumWeightValues());
|
||||
|
@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
|
|||
|
||||
namespace Tegra::Texture::ASTC {
|
||||
|
||||
std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
|
||||
u32 block_height) {
|
||||
u32 blockIdx = 0;
|
||||
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
||||
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
|
||||
u32 block_index = 0;
|
||||
std::size_t depth_offset = 0;
|
||||
std::vector<u8> outData(height * width * depth * 4);
|
||||
for (u32 k = 0; k < depth; k++) {
|
||||
for (u32 j = 0; j < height; j += block_height) {
|
||||
for (u32 i = 0; i < width; i += block_width) {
|
||||
|
||||
const u8* blockPtr = data + blockIdx * 16;
|
||||
for (u32 z = 0; z < depth; z++) {
|
||||
for (u32 y = 0; y < height; y += block_height) {
|
||||
for (u32 x = 0; x < width; x += block_width) {
|
||||
const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
|
||||
|
||||
// Blocks can be at most 12x12
|
||||
u32 uncompData[144];
|
||||
std::array<u32, 12 * 12> uncompData;
|
||||
ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
|
||||
|
||||
u32 decompWidth = std::min(block_width, width - i);
|
||||
u32 decompHeight = std::min(block_height, height - j);
|
||||
u32 decompWidth = std::min(block_width, width - x);
|
||||
u32 decompHeight = std::min(block_height, height - y);
|
||||
|
||||
u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
|
||||
const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
|
||||
for (u32 jj = 0; jj < decompHeight; jj++) {
|
||||
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
|
||||
std::memcpy(outRow.data() + jj * width * 4,
|
||||
uncompData.data() + jj * block_width, decompWidth * 4);
|
||||
}
|
||||
|
||||
blockIdx++;
|
||||
++block_index;
|
||||
}
|
||||
}
|
||||
depth_offset += height * width * 4;
|
||||
}
|
||||
|
||||
return outData;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Texture::ASTC
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue