Use spans over guest memory where possible instead of copying data.

This commit is contained in:
Kelebek1 2023-05-29 00:35:51 +01:00
parent 95ceae40e6
commit 6f7cb69c94
22 changed files with 462 additions and 233 deletions

View file

@ -234,9 +234,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
tmp_buffer.resize_destructive(amount);
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp(
cpu_memory, *cpu_src_address, amount, &tmp_buffer);
tmp.SetAddressAndSize(*cpu_dest_address, amount);
return true;
}

View file

@ -5,6 +5,7 @@
#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
@ -12,6 +13,8 @@
namespace Tegra {
constexpr u32 MacroRegistersStart = 0xE00;
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
Control::ChannelState& channel_state_)
: gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
@ -74,25 +77,16 @@ bool DmaPusher::Step() {
}
// Push buffer non-empty, read a word
command_headers.resize_destructive(command_list_header.size);
constexpr u32 MacroRegistersStart = 0xE00;
if (dma_state.method < MacroRegistersStart) {
if (Settings::IsGPULevelHigh()) {
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
} else {
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
}
} else {
const size_t copy_size = command_list_header.size * sizeof(u32);
if (dma_state.method >= MacroRegistersStart) {
if (subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty =
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(
dma_state.dma_get, command_list_header.size * sizeof(u32));
}
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
}
ProcessCommands(command_headers);
Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
Core::Memory::GuestMemoryFlags::UnsafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers);
ProcessCommands(headers);
}
return true;

View file

@ -5,6 +5,7 @@
#include "common/algorithm.h"
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@ -46,15 +47,11 @@ void State::ProcessData(const u32* data, size_t num_data) {
void State::ProcessData(std::span<const u8> read_buffer) {
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
if (regs.line_count == 1) {
rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
} else {
for (size_t line = 0; line < regs.line_count; ++line) {
const GPUVAddr dest_line = address + line * regs.dest.pitch;
std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in,
regs.line_length_in);
rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
}
for (size_t line = 0; line < regs.line_count; ++line) {
const GPUVAddr dest_line = address + line * regs.dest.pitch;
std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in,
regs.line_length_in);
rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
}
} else {
u32 width = regs.dest.width;
@ -70,13 +67,14 @@ void State::ProcessData(std::span<const u8> read_buffer) {
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
regs.dest.BlockHeight(), regs.dest.BlockDepth());
tmp_buffer.resize_destructive(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
x_elements, regs.line_count, regs.dest.BlockHeight(),
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp(memory_manager, address, dst_size, &tmp_buffer);
Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height,
regs.dest.depth, x_offset, regs.dest.y, x_elements,
regs.line_count, regs.dest.BlockHeight(),
regs.dest.BlockDepth(), regs.line_length_in);
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
}
}

View file

@ -84,7 +84,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
return tic_entry;
}

View file

@ -9,6 +9,7 @@
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/maxwell_3d.h"
@ -679,17 +680,14 @@ void Maxwell3D::ProcessCBData(u32 value) {
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_address_gpu{regs.tex_header.Address() +
tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
return tic_entry;
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() +
tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;

View file

@ -7,6 +7,7 @@
#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/memory_manager.h"
@ -130,11 +131,12 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlock(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
read_buffer.data(), read_buffer.size());
memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
read_buffer.size());
Core::Memory::GpuGuestMemoryScoped<
u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager,
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
16, &read_buffer);
tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16);
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@ -142,20 +144,19 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(),
read_buffer.size());
memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
read_buffer.data(), read_buffer.size());
Core::Memory::GpuGuestMemoryScoped<
u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer);
tmp_write_buffer.SetAddressAndSize(
convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16);
}
} else {
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
read_buffer.resize_destructive(regs.line_length_in);
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(),
regs.line_length_in,
VideoCommon::CacheType::NoBufferCache);
memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
regs.line_length_in);
Core::Memory::GpuGuestMemoryScoped<
u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in,
&read_buffer);
tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in);
}
}
}
@ -222,17 +223,15 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t dst_size = dst_operand.pitch * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size);
memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
memory_manager, src_operand.address, src_size, &read_buffer);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer);
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
dst_operand.pitch);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth,
x_offset, src_params.origin.y, x_elements, regs.line_count, block_height,
block_depth, dst_operand.pitch);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
@ -287,18 +286,17 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
GPUVAddr src_addr = regs.offset_in;
GPUVAddr dst_addr = regs.offset_out;
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
memory_manager, src_addr, src_size, &read_buffer);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer);
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_in);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth,
x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height,
block_depth, regs.pitch_in);
}
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@ -342,23 +340,20 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
const u32 pitch = x_elements * bytes_per_pixel;
const size_t mid_buffer_size = pitch * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
intermediate_buffer.resize_destructive(mid_buffer_size);
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
memory_manager, regs.offset_in, src_size, &read_buffer);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer);
UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height,
UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height,
src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count,
src.block_size.height, src.block_size.depth, pitch);
SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
dst.block_size.height, dst.block_size.depth, pitch);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {

View file

@ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
impl->tmp_buffer.resize_destructive(src_size);
memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer(
memory_manager, src.Address(), src_size, &impl->tmp_buffer);
const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
impl->src_buffer.resize_destructive(src_copy_size);
@ -200,12 +200,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
impl->dst_buffer.resize_destructive(dst_copy_size);
if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
src_extent_y, src.block_height, src.block_depth,
src_extent_x * src_bytes_per_pixel);
UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height,
src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y,
src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel);
} else {
process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
}
@ -221,20 +220,18 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
}
const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
impl->tmp_buffer.resize_destructive(dst_size);
memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite>
tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer);
if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
dst_extent_y, dst.block_height, dst.block_depth,
dst_extent_x * dst_bytes_per_pixel);
SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height,
dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y,
dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel);
} else {
process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y,
dst.pitch, config.dst_x0, config.dst_y0,
static_cast<size_t>(dst_bytes_per_pixel));
}
memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
return true;
}

View file

@ -10,13 +10,13 @@
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
#include "video_core/invalidation_accumulator.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
namespace Tegra {
using Core::Memory::GuestMemoryFlags;
std::atomic<size_t> MemoryManager::unique_identifier_generator{};
@ -587,13 +587,10 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) {
tmp_buffer.resize_destructive(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
// The output block must be flushed in case it has data modified from the GPU.
// Fixes NPC geometry in Zombie Panic in Wonderland DX
Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data(
*this, gpu_src_addr, size);
data.SetAddressAndSize(gpu_dest_addr, size);
FlushRegion(gpu_dest_addr, size, which);
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which);
}
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
@ -758,4 +755,23 @@ void MemoryManager::FlushCaching() {
accumulator->Clear();
}
const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const {
auto cpu_addr = GpuToCpuAddress(src_addr);
if (cpu_addr) {
return memory.GetSpan(*cpu_addr, size);
}
return nullptr;
}
u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) {
if (!IsContinuousRange(src_addr, size)) {
return nullptr;
}
auto cpu_addr = GpuToCpuAddress(src_addr);
if (cpu_addr) {
return memory.GetSpan(*cpu_addr, size);
}
return nullptr;
}
} // namespace Tegra

View file

@ -15,6 +15,7 @@
#include "common/range_map.h"
#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
#include "core/memory.h"
#include "video_core/cache_types.h"
#include "video_core/pte_kind.h"
@ -62,6 +63,20 @@ public:
[[nodiscard]] u8* GetPointer(GPUVAddr addr);
[[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
template <typename T>
[[nodiscard]] T* GetPointer(GPUVAddr addr) {
const auto address{GpuToCpuAddress(addr)};
if (!address) {
return {};
}
return memory.GetPointer(*address);
}
template <typename T>
[[nodiscard]] const T* GetPointer(GPUVAddr addr) const {
return GetPointer<T*>(addr);
}
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
* GPU Memory. It's important to use these when GPU memory may not be continuous
@ -139,6 +154,9 @@ public:
void FlushCaching();
const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const;
u8* GetSpan(const GPUVAddr src_addr, const std::size_t size);
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,

View file

@ -8,6 +8,7 @@
#include "common/alignment.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
@ -1022,19 +1023,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
runtime.AccelerateImageUpload(image, staging, uploads);
return;
}
const size_t guest_size_bytes = image.guest_size_bytes;
swizzle_data_buffer.resize_destructive(guest_size_bytes);
gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
*gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
if (True(image.flags & ImageFlagBits::Converted)) {
unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer,
unswizzle_data_buffer);
auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
} else {
const auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span);
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
image.UploadMemory(staging, copies);
}
}
@ -1227,11 +1228,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
decode->image_id = image_id;
async_decodes.push_back(std::move(decode));
Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes);
const size_t guest_size_bytes = image.guest_size_bytes;
swizzle_data_buffer.resize_destructive(guest_size_bytes);
gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
static Common::ScratchBuffer<u8> local_unswizzle_data_buffer;
local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
*gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
local_unswizzle_data_buffer);
const size_t out_size = MapSizeBytes(image);

View file

@ -20,6 +20,7 @@
#include "common/div_ceil.h"
#include "common/scratch_buffer.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/compatible_formats.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
tile_size.height, info.tile_width_spacing);
const size_t subresource_size = sizes[level];
tmp_buffer.resize_destructive(subresource_size);
const std::span<u8> dst(tmp_buffer);
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset);
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
{
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite>
dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth);
gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth);
}
host_offset += host_bytes_per_layer;
guest_offset += layer_stride;
@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
const Extent3D size = info.size;
if (info.type == ImageType::Linear) {
ASSERT(output.size_bytes() >= guest_size_bytes);
gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
return copies;
}
BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output) {
gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
return BufferCopy{
.src_offset = 0,
.dst_offset = 0,
.size = image.guest_size_bytes,
};
}
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies) {
u32 output_offset = 0;

View file

@ -66,9 +66,6 @@ struct OverlapResult {
Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const u8> input, std::span<u8> output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output);
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies);