Memory: Remove count of cached pages and add InvalidateRegion

In a future commit, the count of cached pages will be reintroduced in
the actual surface cache. Also adds an Invalidate only to the cache
which marks a region as invalid in order to try to avoid a costly flush
from 3ds memory
This commit is contained in:
James Rowe 2017-11-23 10:43:12 -07:00
parent c821c14908
commit 13606a6d0b
8 changed files with 50 additions and 54 deletions

View file

@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) {
u8* start = Memory::GetPhysicalPointer(start_addr);
u8* end = Memory::GetPhysicalPointer(end_addr);
// TODO: Consider always accelerating and returning vector of
// regions that the accelerated fill did not cover to
// reduce/eliminate the fill that the cpu has to do.
// This would also mean that the flush below is not needed.
// Fill should first flush all surfaces that touch but are
// not completely within the fill range.
// Then fill all completely covered surfaces, and return the
// regions that were between surfaces or within the touching
// ones for cpu to manually fill here.
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
return;
Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress());
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress());
if (config.fill_24bit) {
// fill with 24-bit values
@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
static_cast<u32>(contiguous_output_size));
// Only need to flush output if it has a gap
const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion
: Memory::RasterizerInvalidateRegion;
FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
u32 remaining_input = input_width;
u32 remaining_output = output_width;
@ -446,16 +439,18 @@ inline void Write(u32 addr, const T data) {
if (config.is_texture_copy) {
TextureCopy(config);
LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> "
"0x%08X(%u+%u), flags 0x%08X",
LOG_TRACE(HW_GPU,
"TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> "
"0x%08X(%u+%u), flags 0x%08X",
config.texture_copy.size, config.GetPhysicalInputAddress(),
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
config.texture_copy.output_gap * 16, config.flags);
} else {
DisplayTransfer(config);
LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> "
"0x%08x(%ux%u), dst format %x, flags 0x%08X",
LOG_TRACE(HW_GPU,
"DisplayTransfer: 0x%08x(%ux%u)-> "
"0x%08x(%ux%u), dst format %x, flags 0x%08X",
config.GetPhysicalInputAddress(), config.input_width.Value(),
config.input_height.Value(), config.GetPhysicalOutputAddress(),
config.output_width.Value(), config.output_height.Value(),
@ -570,4 +565,4 @@ void Shutdown() {
LOG_DEBUG(HW_GPU, "shutdown OK");
}
} // namespace
} // namespace GPU