Use spans over guest memory where possible instead of copying data.
This commit is contained in:
parent
95ceae40e6
commit
6f7cb69c94
22 changed files with 462 additions and 233 deletions
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "common/alignment.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
|
@ -1022,19 +1023,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
|||
runtime.AccelerateImageUpload(image, staging, uploads);
|
||||
return;
|
||||
}
|
||||
const size_t guest_size_bytes = image.guest_size_bytes;
|
||||
swizzle_data_buffer.resize_destructive(guest_size_bytes);
|
||||
gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
|
||||
|
||||
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
|
||||
*gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
|
||||
|
||||
if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
|
||||
auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer,
|
||||
unswizzle_data_buffer);
|
||||
auto copies =
|
||||
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
|
||||
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
} else {
|
||||
const auto copies =
|
||||
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span);
|
||||
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
|
||||
image.UploadMemory(staging, copies);
|
||||
}
|
||||
}
|
||||
|
@ -1227,11 +1228,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
|
|||
decode->image_id = image_id;
|
||||
async_decodes.push_back(std::move(decode));
|
||||
|
||||
Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes);
|
||||
const size_t guest_size_bytes = image.guest_size_bytes;
|
||||
swizzle_data_buffer.resize_destructive(guest_size_bytes);
|
||||
gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
|
||||
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
|
||||
static Common::ScratchBuffer<u8> local_unswizzle_data_buffer;
|
||||
local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
|
||||
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
|
||||
*gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
|
||||
|
||||
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
|
||||
local_unswizzle_data_buffer);
|
||||
const size_t out_size = MapSizeBytes(image);
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "common/div_ceil.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/compatible_formats.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
|||
tile_size.height, info.tile_width_spacing);
|
||||
const size_t subresource_size = sizes[level];
|
||||
|
||||
tmp_buffer.resize_destructive(subresource_size);
|
||||
const std::span<u8> dst(tmp_buffer);
|
||||
|
||||
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||
const std::span<const u8> src = input.subspan(host_offset);
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
|
||||
{
|
||||
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite>
|
||||
dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
|
||||
|
||||
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
|
||||
num_tiles.depth, block.height, block.depth);
|
||||
|
||||
gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
|
||||
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
|
||||
num_tiles.depth, block.height, block.depth);
|
||||
}
|
||||
|
||||
host_offset += host_bytes_per_layer;
|
||||
guest_offset += layer_stride;
|
||||
|
@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
|
|||
const Extent3D size = info.size;
|
||||
|
||||
if (info.type == ImageType::Linear) {
|
||||
ASSERT(output.size_bytes() >= guest_size_bytes);
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
|
||||
|
||||
ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
|
||||
|
@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
|
|||
return copies;
|
||||
}
|
||||
|
||||
BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageBase& image, std::span<u8> output) {
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
|
||||
return BufferCopy{
|
||||
.src_offset = 0,
|
||||
.dst_offset = 0,
|
||||
.size = image.guest_size_bytes,
|
||||
};
|
||||
}
|
||||
|
||||
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
|
||||
std::span<BufferImageCopy> copies) {
|
||||
u32 output_offset = 0;
|
||||
|
|
|
@ -66,9 +66,6 @@ struct OverlapResult {
|
|||
Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const u8> input, std::span<u8> output);
|
||||
|
||||
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageBase& image, std::span<u8> output);
|
||||
|
||||
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
|
||||
std::span<BufferImageCopy> copies);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue