Merge pull request #2400 from FernandoS27/corret-kepler-mem

Implement Kepler Memory on both Linear and BlockLinear.
This commit is contained in:
bunnei 2019-04-22 16:47:05 -04:00 committed by GitHub
commit 01100f8afd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 17 deletions

View file

@ -10,6 +10,7 @@
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): {
state.write_offset = 0;
ProcessExec();
break;
}
case KEPLERMEMORY_REG_INDEX(data): {
ProcessData(method_call.argument);
ProcessData(method_call.argument, method_call.IsLastCall());
break;
}
}
}
void KeplerMemory::ProcessData(u32 data) {
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
void KeplerMemory::ProcessExec() {
state.write_offset = 0;
state.copy_size = regs.line_length_in * regs.line_count;
state.inner_buffer.resize(state.copy_size);
}
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might
// contain a dirty surface that will have to be written back to memory.
const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
memory_manager.Write<u32>(address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++;
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
state.write_offset += sub_copy_size;
if (is_last_call) {
const GPUVAddr address{regs.dest.Address()};
if (regs.exec.linear != 0) {
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
std::vector<u8> tmp_buffer(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
state.inner_buffer.data(), tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
}
} // namespace Tegra::Engines

View file

@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@ -51,7 +52,11 @@ public:
u32 address_high;
u32 address_low;
u32 pitch;
u32 block_dimensions;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
@ -63,6 +68,18 @@ public:
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
struct {
@ -81,6 +98,8 @@ public:
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
private:
@ -88,7 +107,8 @@ private:
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
void ProcessData(u32 data);
void ProcessExec();
void ProcessData(u32 data, bool is_last_call);
};
#define ASSERT_REG_POSITION(field_name, position) \