Surface management rework (1/3) (#307)

* amdgpu: proper CB and DB sizes calculation; minor refactoring

* texture_cache: separate file for image_info

* texture_cache: image guest address moved into image info

* texture_cache: surface size calculation

* shader_recompiler: fixed sin/cos

Thanks to red_pring and gandalfthewhite0173

* initial preparations for subresources upload

* review comments
This commit is contained in:
psucien 2024-07-20 11:51:21 +02:00 committed by GitHub
parent 2b52a17845
commit 64459f1a76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 467 additions and 233 deletions

View file

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -14,25 +13,8 @@
namespace VideoCore {
using namespace Vulkan;
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
using Libraries::VideoOut::TilingMode;
static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
switch (format) {
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb:
return vk::Format::eR8G8B8A8Srgb;
case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb:
return vk::Format::eA2R10G10B10UnormPack32;
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
bool ImageInfo::IsBlockCoded() const {
switch (pixel_format) {
case vk::Format::eBc1RgbaSrgbBlock:
@ -101,93 +83,6 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
return usage;
}
static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
switch (type) {
case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color1DArray:
return vk::ImageType::e1D;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DArray:
return vk::ImageType::e2D;
case AmdGpu::ImageType::Color3D:
return vk::ImageType::e3D;
default:
UNREACHABLE();
}
}
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
const auto& attrib = group.attrib;
is_tiled = attrib.tiling_mode == TilingMode::Tile;
tiling_mode =
is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear;
pixel_format = ConvertPixelFormat(attrib.pixel_format);
type = vk::ImageType::e2D;
size.width = attrib.width;
size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127);
const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
ASSERT(is_32bpp);
if (!is_tiled) {
guest_size_bytes = pitch * size.height * 4;
return;
}
if (Config::isNeoMode()) {
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
}
usage.vo_buffer = true;
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
is_tiled = buffer.IsTiled();
tiling_mode = buffer.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
num_samples = 1 << buffer.attrib.num_fragments_log2;
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.GetSizeAligned();
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
usage.render_target = true;
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.GetSizeAligned();
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true;
}
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
is_tiled = image.IsTiled();
tiling_mode = image.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
type = ConvertImageType(image.GetType());
size.width = image.width + 1;
size.height = image.height + 1;
size.depth = 1;
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
guest_size_bytes = image.GetSizeAligned();
usage.texture = true;
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}
@ -217,9 +112,9 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
}
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const ImageInfo& info_, VAddr cpu_addr)
const ImageInfo& info_)
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
ASSERT(info.pixel_format != vk::Format::eUndefined);
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |

View file

@ -9,6 +9,7 @@
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
@ -34,47 +35,6 @@ enum ImageFlagBits : u32 {
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
bool IsTiled() const {
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
}
bool IsBlockCoded() const;
bool IsPacked() const;
bool IsDepthStencil() const;
struct {
VAddr cmask_addr;
VAddr fmask_addr;
VAddr htile_addr;
} meta_info{};
struct {
u32 texture : 1;
u32 storage : 1;
u32 render_target : 1;
u32 depth_target : 1;
u32 vo_buffer : 1;
} usage{}; // Usage data tracked during image lifetime
bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined;
vk::ImageType type = vk::ImageType::e1D;
SubresourceExtent resources;
Extent3D size{1, 1, 1};
u32 num_samples = 1;
u32 pitch = 0;
u32 guest_size_bytes = 0;
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
};
struct UniqueImage {
explicit UniqueImage(vk::Device device, VmaAllocator allocator);
~UniqueImage();
@ -109,8 +69,7 @@ private:
constexpr Common::SlotId NULL_IMAGE_ID{0};
struct Image {
explicit Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageInfo& info, VAddr cpu_addr);
Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageInfo& info);
~Image();
Image(const Image&) = delete;

View file

@ -0,0 +1,268 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/texture_cache/image_info.h"
namespace VideoCore {
using namespace Vulkan;
using Libraries::VideoOut::TilingMode;
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
switch (format) {
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb:
return vk::Format::eR8G8B8A8Srgb;
case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb:
return vk::Format::eA2R10G10B10UnormPack32;
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
switch (type) {
case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color1DArray:
return vk::ImageType::e1D;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DArray:
return vk::ImageType::e2D;
case AmdGpu::ImageType::Color3D:
return vk::ImageType::e3D;
default:
UNREACHABLE();
}
}
// clang-format off
// The table of macro tiles parameters for given tiling index (row) and bpp (column)
static constexpr std::array macro_tile_extents{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0B
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 10
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 11
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
// clang-format on
static constexpr std::pair micro_tile_extent{8u, 8u};
static constexpr auto hw_pipe_interleave = 256u;
static constexpr std::pair<u32, u32> GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) {
ASSERT(num_samples == 1);
const auto row = tiling_idx * 4;
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64
return macro_tile_extents[row + column];
}
static constexpr size_t ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, u32 num_samples) {
const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8));
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = height;
size_t log_sz = 1;
const auto slice_align = std::max(64u, hw_pipe_interleave / (bpp + 7) / 8);
while (log_sz % slice_align) {
log_sz = pitch_aligned * height_aligned * num_samples;
pitch_aligned += pitch_align;
}
return (log_sz * bpp + 7) / 8;
}
static constexpr size_t ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples) {
const auto& [pitch_align, height_align] = micro_tile_extent;
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
size_t log_sz = 1;
while (log_sz % 256) {
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
pitch_aligned += 8;
}
return log_sz;
}
static constexpr size_t ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples,
u32 tiling_idx) {
const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples);
ASSERT(pitch_align != 0 && height_align != 0);
const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
return (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
}
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
VAddr cpu_address) noexcept {
const auto& attrib = group.attrib;
is_tiled = attrib.tiling_mode == TilingMode::Tile;
tiling_mode =
is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear;
pixel_format = ConvertPixelFormat(attrib.pixel_format);
type = vk::ImageType::e2D;
size.width = attrib.width;
size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127);
usage.vo_buffer = true;
const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
ASSERT(is_32bpp);
guest_address = cpu_address;
if (!is_tiled) {
guest_size_bytes = pitch * size.height * 4;
} else {
if (Config::isNeoMode()) {
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
}
}
mips_layout.emplace_back(0, guest_size_bytes);
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
is_tiled = buffer.IsTiled();
tiling_mode = buffer.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
num_samples = 1 << buffer.attrib.num_fragments_log2;
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = buffer.Pitch();
resources.layers = buffer.NumSlices();
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
usage.render_target = true;
guest_address = buffer.Address();
const auto color_slice_sz = buffer.GetColorSliceSize();
guest_size_bytes = color_slice_sz * buffer.NumSlices();
mips_layout.emplace_back(0, color_slice_sz);
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true;
guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
guest_size_bytes = depth_slice_sz * num_slices;
mips_layout.emplace_back(0, depth_slice_sz);
}
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
is_tiled = image.IsTiled();
tiling_mode = image.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
type = ConvertImageType(image.GetType());
is_cube = image.GetType() == AmdGpu::ImageType::Cube;
is_volume = image.GetType() == AmdGpu::ImageType::Color3D;
size.width = image.width + 1;
size.height = image.height + 1;
size.depth = is_volume ? image.depth + 1 : 1;
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
usage.texture = true;
guest_address = image.Address();
mips_layout.reserve(resources.levels);
const auto num_bits = NumBits(image.GetDataFmt());
const auto is_block = IsBlockCoded();
const auto is_pow2 = image.pow2pad;
guest_size_bytes = 0;
for (auto mip = 0u; mip < resources.levels; ++mip) {
auto bpp = num_bits;
auto mip_w = pitch >> mip;
auto mip_h = size.height >> mip;
if (is_block) {
mip_w = (mip_w + 3) / 4;
mip_h = (mip_h + 3) / 4;
bpp *= 16;
}
mip_w = std::max(mip_w, 1u);
mip_h = std::max(mip_h, 1u);
auto mip_d = std::max(size.depth >> mip, 1u);
if (is_pow2) {
mip_w = std::bit_ceil(mip_w);
mip_h = std::bit_ceil(mip_h);
mip_d = std::bit_ceil(mip_d);
}
size_t mip_size = 0;
switch (tiling_mode) {
case AmdGpu::TilingMode::Display_Linear: {
ASSERT(!is_cube);
mip_size = ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples);
break;
}
case AmdGpu::TilingMode::Texture_MicroTiled: {
mip_size = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples);
break;
}
case AmdGpu::TilingMode::Display_MacroTiled:
case AmdGpu::TilingMode::Texture_MacroTiled:
case AmdGpu::TilingMode::Depth_MacroTiled: {
ASSERT(!is_cube && !is_block);
ASSERT(num_samples == 1);
ASSERT(num_bits <= 64);
mip_size = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);
break;
}
default: {
UNREACHABLE();
}
}
mip_size *= mip_d;
mips_layout.emplace_back(guest_size_bytes, mip_size);
guest_size_bytes += mip_size;
}
guest_size_bytes *= resources.layers;
}
} // namespace VideoCore

View file

@ -0,0 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/texture_cache/types.h"
namespace VideoCore {
struct ImageInfo {
ImageInfo() = default;
ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept;
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
ImageInfo(const AmdGpu::Image& image) noexcept;
bool IsTiled() const {
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
}
bool IsBlockCoded() const;
bool IsPacked() const;
bool IsDepthStencil() const;
struct {
VAddr cmask_addr;
VAddr fmask_addr;
VAddr htile_addr;
} meta_info{};
struct {
u32 texture : 1;
u32 storage : 1;
u32 render_target : 1;
u32 depth_target : 1;
u32 stencil : 1;
u32 vo_buffer : 1;
} usage{}; // Usage data tracked during image lifetime
bool is_cube = false;
bool is_volume = false;
bool is_tiled = false;
bool is_read_only = false;
vk::Format pixel_format = vk::Format::eUndefined;
vk::ImageType type = vk::ImageType::e1D;
SubresourceExtent resources;
Extent3D size{1, 1, 1};
u32 num_samples = 1;
u32 pitch = 0;
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
std::vector<std::pair<u32, u32>> mips_layout;
VAddr guest_address{0};
u32 guest_size_bytes{0};
};
} // namespace VideoCore

View file

@ -89,7 +89,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
ImageInfo info;
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
info.type = vk::ImageType::e2D;
const ImageId null_id = slot_images.insert(instance, scheduler, info, 0);
const ImageId null_id = slot_images.insert(instance, scheduler, info);
ASSERT(null_id.index == 0);
ImageViewInfo view_info;
@ -112,26 +112,27 @@ void TextureCache::OnCpuWrite(VAddr address) {
});
}
ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) {
ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
std::unique_lock lock{m_page_table};
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
// Address and width must match.
if (image.cpu_addr != cpu_address || image.info.size.width != info.size.width) {
return;
}
if (info.IsDepthStencil() != image.info.IsDepthStencil() &&
info.pixel_format != vk::Format::eR32Sfloat) {
return;
}
image_ids.push_back(image_id);
});
ForEachImageInRegion(
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
// Address and width must match.
if (image.cpu_addr != info.guest_address || image.info.size.width != info.size.width) {
return;
}
if (info.IsDepthStencil() != image.info.IsDepthStencil() &&
info.pixel_format != vk::Format::eR32Sfloat) {
return;
}
image_ids.push_back(image_id);
});
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
ImageId image_id{};
if (image_ids.empty()) {
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
image_id = slot_images.insert(instance, scheduler, info);
RegisterImage(image_id);
} else {
image_id = image_ids[0];
@ -169,9 +170,9 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
return slot_image_views[view_id];
}
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) {
const ImageInfo info{desc};
const ImageId image_id = FindImage(info, desc.Address());
const ImageId image_id = FindImage(info);
Image& image = slot_images[image_id];
auto& usage = image.info.usage;
@ -190,10 +191,10 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
return RegisterImageView(image_id, view_info);
}
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
const ImageId image_id = FindImage(info, buffer.Address());
const ImageId image_id = FindImage(info);
Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified;
@ -207,11 +208,12 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
return RegisterImageView(image_id, view_info);
}
ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_enabled) {
const ImageInfo info{buffer, htile_address, hint};
const ImageId image_id = FindImage(info, buffer.Address(), false);
ImageView& TextureCache::FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
u32 num_slices, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_enabled) {
const ImageInfo info{buffer, num_slices, htile_address, hint};
const ImageId image_id = FindImage(info, false);
Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified;
@ -244,21 +246,24 @@ void TextureCache::RefreshImage(Image& image) {
return;
}
ASSERT(image.info.resources.levels == image.info.mips_layout.size());
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = image.info.size.width >> m;
const u32 height = image.info.size.height >> m;
const u32 map_size = width * height * image.info.resources.layers;
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth = image.info.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const u32 map_size = image.info.mips_layout[m].second * image.info.resources.layers;
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(map_size, 16);
if (image.info.is_tiled) {
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
} else {
std::memcpy(data, image_data, map_size);
std::memcpy(data,
image_data + image.info.mips_layout[m].first * image.info.resources.layers,
map_size);
}
staging.Commit(map_size);
image_data += map_size;
// Copy to the image.
const vk::BufferImageCopy image_copy = {
@ -272,7 +277,7 @@ void TextureCache::RefreshImage(Image& image) {
.layerCount = u32(image.info.resources.layers),
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, 1},
.imageExtent = {width, height, depth},
};
scheduler.EndRendering();

View file

@ -47,20 +47,21 @@ public:
/// Invalidates any image in the logical page range.
void OnCpuWrite(VAddr address);
/// Retrieves the image handle of the image with the provided attributes and address.
[[nodiscard]] ImageId FindImage(const ImageInfo& info, VAddr cpu_address,
bool refresh_on_create = true);
/// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage);
[[nodiscard]] ImageView& FindTexture(const AmdGpu::Image& image, bool is_storage);
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
[[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_enabled);
[[nodiscard]] ImageView& FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
/// Retrieves the depth target with specified properties
[[nodiscard]] ImageView& FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
u32 num_slices, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_enabled);
/// Reuploads image contents.
void RefreshImage(Image& image);

View file

@ -19,10 +19,6 @@
namespace VideoCore {
static u32 IntLog2(u32 i) {
return 31 - __builtin_clz(i | 1u);
}
class TileManager32 {
public:
u32 m_macro_tile_height = 0;
@ -81,8 +77,8 @@ public:
static u32 getBankIdx(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks,
u32 num_pipes) {
const u32 x_shift_offset = IntLog2(bank_width * num_pipes);
const u32 y_shift_offset = IntLog2(bank_height);
const u32 x_shift_offset = std::bit_width(bank_width * num_pipes) - 1;
const u32 y_shift_offset = std::bit_width(bank_height) - 1;
const u32 xs = x >> x_shift_offset;
const u32 ys = y >> y_shift_offset;
u32 bank = 0;
@ -210,8 +206,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
const DetilerContext* TileManager::GetDetiler(const Image& image) const {
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format);
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled ||
image.info.tiling_mode == AmdGpu::TilingMode::Depth_MicroTiled) {
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled) {
switch (format) {
case vk::Format::eR8Uint:
return &detilers[DetilerType::Micro8x1];