Remove memory allocations in some hot paths
This commit is contained in:
parent
e3122c5b46
commit
5da70f7197
84 changed files with 501 additions and 458 deletions
|
@ -6,6 +6,7 @@
|
|||
#include <array>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
@ -108,8 +109,8 @@ struct ImageBase {
|
|||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
|
||||
std::vector<u32> slice_offsets;
|
||||
std::vector<SubresourceBase> slice_subresources;
|
||||
boost::container::small_vector<u32, 16> slice_offsets;
|
||||
boost::container::small_vector<SubresourceBase, 16> slice_subresources;
|
||||
|
||||
std::vector<AliasedImage> aliased_images;
|
||||
std::vector<ImageId> overlapping_images;
|
||||
|
|
|
@ -526,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
|
|||
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||
std::vector<ImageId> images;
|
||||
boost::container::small_vector<ImageId, 16> images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
|
||||
if (!image.IsSafeDownload()) {
|
||||
return;
|
||||
|
@ -579,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
|
|||
|
||||
template <class P>
|
||||
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
std::vector<ImageId> deleted_images;
|
||||
boost::container::small_vector<ImageId, 16> deleted_images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
|
@ -593,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
|||
|
||||
template <class P>
|
||||
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
|
||||
std::vector<ImageId> deleted_images;
|
||||
boost::container::small_vector<ImageId, 16> deleted_images;
|
||||
ForEachImageInRegionGPU(as_id, gpu_addr, size,
|
||||
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
|
@ -1101,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|||
const bool native_bgr = runtime.HasNativeBgr();
|
||||
const bool flexible_formats = True(options & RelaxedOptions::Format);
|
||||
ImageId image_id{};
|
||||
boost::container::small_vector<ImageId, 1> image_ids;
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
|
||||
return false;
|
||||
|
@ -1622,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
|
|||
}
|
||||
}
|
||||
ImageId image_id{};
|
||||
boost::container::small_vector<ImageId, 1> image_ids;
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
|
||||
return false;
|
||||
|
@ -1942,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
|||
image.map_view_id = map_id;
|
||||
return;
|
||||
}
|
||||
std::vector<ImageViewId> sparse_maps{};
|
||||
boost::container::small_vector<ImageViewId, 16> sparse_maps;
|
||||
ForEachSparseSegment(
|
||||
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
|
||||
|
@ -2217,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
|
|||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
|
||||
boost::container::small_vector<const AliasedImage*, 8> aliased_images;
|
||||
Image& image = slot_images[image_id];
|
||||
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
|
||||
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
|
||||
|
|
|
@ -56,7 +56,7 @@ struct ImageViewInOut {
|
|||
struct AsyncDecodeContext {
|
||||
ImageId image_id;
|
||||
Common::ScratchBuffer<u8> decoded_data;
|
||||
std::vector<BufferImageCopy> copies;
|
||||
boost::container::small_vector<BufferImageCopy, 16> copies;
|
||||
std::mutex mutex;
|
||||
std::atomic_bool complete;
|
||||
};
|
||||
|
@ -429,7 +429,7 @@ private:
|
|||
|
||||
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
|
||||
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
|
||||
std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
|
||||
|
||||
VAddr virtual_invalid_space{};
|
||||
|
||||
|
|
|
@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
|
|||
|
||||
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
|
||||
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
|
||||
const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
|
||||
const auto slice_offsets = CalculateSliceOffsets(new_info);
|
||||
const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
|
||||
const auto it = std::ranges::find(slice_offsets, diff);
|
||||
if (it == slice_offsets.end()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const std::vector subresources = CalculateSliceSubresources(new_info);
|
||||
const auto subresources = CalculateSliceSubresources(new_info);
|
||||
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
|
||||
const ImageInfo& info = overlap.info;
|
||||
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
|
||||
|
@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
|
|||
return sizes;
|
||||
}
|
||||
|
||||
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
||||
boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
|
||||
ASSERT(info.type == ImageType::e3D);
|
||||
std::vector<u32> offsets;
|
||||
boost::container::small_vector<u32, 16> offsets;
|
||||
offsets.reserve(NumSlices(info));
|
||||
|
||||
const LevelInfo level_info = MakeLevelInfo(info);
|
||||
|
@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
|||
return offsets;
|
||||
}
|
||||
|
||||
std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
|
||||
boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
|
||||
const ImageInfo& info) {
|
||||
ASSERT(info.type == ImageType::e3D);
|
||||
std::vector<SubresourceBase> subresources;
|
||||
boost::container::small_vector<SubresourceBase, 16> subresources;
|
||||
subresources.reserve(NumSlices(info));
|
||||
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||
const s32 depth = AdjustMipSize(info.size.depth, level);
|
||||
|
@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
|
||||
SubresourceBase base, u32 up_scale, u32 down_shift) {
|
||||
boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
|
||||
const ImageInfo& src,
|
||||
SubresourceBase base,
|
||||
u32 up_scale, u32 down_shift) {
|
||||
ASSERT(dst.resources.levels >= src.resources.levels);
|
||||
|
||||
const bool is_dst_3d = dst.type == ImageType::e3D;
|
||||
|
@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
|
|||
ASSERT(src.resources.levels == 1);
|
||||
}
|
||||
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
|
||||
std::vector<ImageCopy> copies;
|
||||
boost::container::small_vector<ImageCopy, 16> copies;
|
||||
copies.reserve(src.resources.levels);
|
||||
for (s32 level = 0; level < src.resources.levels; ++level) {
|
||||
ImageCopy& copy = copies.emplace_back();
|
||||
|
@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
|
|||
return copies;
|
||||
}
|
||||
|
||||
std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale,
|
||||
u32 down_shift) {
|
||||
std::vector<ImageCopy> copies;
|
||||
boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
|
||||
u32 up_scale,
|
||||
u32 down_shift) {
|
||||
boost::container::small_vector<ImageCopy, 16> copies;
|
||||
copies.reserve(src.resources.levels);
|
||||
const bool is_3d = src.type == ImageType::e3D;
|
||||
for (s32 level = 0; level < src.resources.levels; ++level) {
|
||||
|
@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
|
|||
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
|
||||
}
|
||||
|
||||
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageInfo& info, std::span<const u8> input,
|
||||
std::span<u8> output) {
|
||||
boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||
GPUVAddr gpu_addr,
|
||||
const ImageInfo& info,
|
||||
std::span<const u8> input,
|
||||
std::span<u8> output) {
|
||||
const size_t guest_size_bytes = input.size_bytes();
|
||||
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
|
||||
const Extent3D size = info.size;
|
||||
|
@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
|
|||
info.tile_width_spacing);
|
||||
size_t guest_offset = 0;
|
||||
u32 host_offset = 0;
|
||||
std::vector<BufferImageCopy> copies(num_levels);
|
||||
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
|
||||
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
|
@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
|
||||
boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
|
||||
const Extent3D size = info.size;
|
||||
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||
if (info.type == ImageType::Linear) {
|
||||
|
@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
|
|||
|
||||
u32 host_offset = 0;
|
||||
|
||||
std::vector<BufferImageCopy> copies(num_levels);
|
||||
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
|
||||
|
@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
|
|||
return AdjustMipBlockSize(num_tiles, level_info.block, level);
|
||||
}
|
||||
|
||||
std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
|
||||
boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
|
||||
const Extent2D tile_size = DefaultBlockSize(info.format);
|
||||
if (info.type == ImageType::Linear) {
|
||||
return std::vector{SwizzleParameters{
|
||||
return {SwizzleParameters{
|
||||
.num_tiles = AdjustTileSize(info.size, tile_size),
|
||||
.block = {},
|
||||
.buffer_offset = 0,
|
||||
|
@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
|
|||
const s32 num_levels = info.resources.levels;
|
||||
|
||||
u32 guest_offset = 0;
|
||||
std::vector<SwizzleParameters> params(num_levels);
|
||||
boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
|
@ -40,9 +41,10 @@ struct OverlapResult {
|
|||
|
||||
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
|
||||
|
||||
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
|
||||
|
||||
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
|
||||
const ImageInfo& info);
|
||||
|
||||
[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
|
||||
|
||||
|
@ -51,21 +53,18 @@ struct OverlapResult {
|
|||
|
||||
[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
|
||||
|
||||
[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
|
||||
const ImageInfo& src,
|
||||
SubresourceBase base, u32 up_scale = 1,
|
||||
u32 down_shift = 0);
|
||||
[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
|
||||
const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
|
||||
u32 down_shift = 0);
|
||||
|
||||
[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src,
|
||||
u32 up_scale = 1,
|
||||
u32 down_shift = 0);
|
||||
[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
|
||||
const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
|
||||
|
||||
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
|
||||
|
||||
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||
GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const u8> input,
|
||||
std::span<u8> output);
|
||||
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
|
||||
Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const u8> input, std::span<u8> output);
|
||||
|
||||
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageBase& image, std::span<u8> output);
|
||||
|
@ -73,13 +72,15 @@ struct OverlapResult {
|
|||
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
|
||||
std::span<BufferImageCopy> copies);
|
||||
|
||||
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
|
||||
const ImageInfo& info);
|
||||
|
||||
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
|
||||
|
||||
[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
|
||||
|
||||
[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
|
||||
const ImageInfo& info);
|
||||
|
||||
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const BufferImageCopy> copies, std::span<const u8> memory,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue