Fermi2D: Rework blit engine and add a software blitter.
This commit is contained in:
parent
168c9ee341
commit
957840be91
12 changed files with 1431 additions and 18 deletions
|
@ -3,17 +3,25 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
MICROPROFILE_DECLARE(GPU_BlitEngine);
|
||||
MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128));
|
||||
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() {
|
||||
using namespace Texture;
|
||||
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
|
@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
|
|||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
MICROPROFILE_SCOPE(GPU_BlitEngine);
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
|
@ -52,9 +61,12 @@ void Fermi2D::Blit() {
|
|||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
constexpr s64 null_derivate = 1ULL << 32;
|
||||
Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
.must_accelerate = args.du_dx != null_derivate || args.dv_dy != null_derivate ||
|
||||
args.sample_mode.filter == Filter::Bilinear,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
|
@ -78,8 +90,9 @@ void Fermi2D::Blit() {
|
|||
config.src_x1 -= config.src_x0;
|
||||
config.src_x0 = 0;
|
||||
}
|
||||
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
sw_blitter->Blit(src, regs.dst, config);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
@ -21,6 +22,10 @@ class RasterizerInterface;
|
|||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
namespace Blitter {
|
||||
class SoftwareBlitEngine;
|
||||
}
|
||||
|
||||
/**
|
||||
* This Engine is known as G80_2D. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
|
||||
|
@ -32,7 +37,7 @@ namespace Tegra::Engines {
|
|||
|
||||
class Fermi2D final : public EngineInterface {
|
||||
public:
|
||||
explicit Fermi2D();
|
||||
explicit Fermi2D(MemoryManager& memory_manager_);
|
||||
~Fermi2D() override;
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
|
@ -286,6 +291,7 @@ public:
|
|||
struct Config {
|
||||
Operation operation;
|
||||
Filter filter;
|
||||
bool must_accelerate;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_x1;
|
||||
|
@ -298,6 +304,7 @@ public:
|
|||
|
||||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
|
|
213
src/video_core/engines/sw_blitter/blitter.cpp
Normal file
213
src/video_core/engines/sw_blitter/blitter.cpp
Normal file
|
@ -0,0 +1,213 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
#include "video_core/engines/sw_blitter/converter.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
||||
namespace Tegra::Engines::Blitter {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
namespace {
|
||||
|
||||
void NeighrestNeighbor(std::span<u8> input, std::span<u8> output, u32 src_width, u32 src_height,
|
||||
u32 dst_width, u32 dst_height, size_t bpp) {
|
||||
const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
|
||||
const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
|
||||
size_t src_y = 0;
|
||||
for (u32 y = 0; y < dst_height; y++) {
|
||||
size_t src_x = 0;
|
||||
for (u32 x = 0; x < dst_width; x++) {
|
||||
const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp;
|
||||
const size_t write_to = (y * dst_width + x) * bpp;
|
||||
|
||||
std::memcpy(&output[write_to], &input[read_from], bpp);
|
||||
src_x += dx_du;
|
||||
}
|
||||
src_y += dy_dv;
|
||||
}
|
||||
}
|
||||
|
||||
void NeighrestNeighborFast(std::span<f32> input, std::span<f32> output, u32 src_width,
|
||||
u32 src_height, u32 dst_width, u32 dst_height) {
|
||||
const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
|
||||
const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
|
||||
size_t src_y = 0;
|
||||
for (u32 y = 0; y < dst_height; y++) {
|
||||
size_t src_x = 0;
|
||||
for (u32 x = 0; x < dst_width; x++) {
|
||||
const size_t read_from = ((src_y * src_width + src_x) >> 32) * 4;
|
||||
const size_t write_to = (y * dst_width + x) * 4;
|
||||
|
||||
std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * 4);
|
||||
src_x += dx_du;
|
||||
}
|
||||
src_y += dy_dv;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void Bilinear(std::span<f32> input, std::span<f32> output, size_t src_width,
|
||||
size_t src_height, size_t dst_width, size_t dst_height) {
|
||||
const auto inv_lerp = [](u32 coord, u32 end) { return
|
||||
static_cast<f32>(std::min(std::max(static_cast<s32>(coord), 0), end - 1)) / (end); };
|
||||
|
||||
|
||||
for (u32 y = 0; y < dst_height; y++) {
|
||||
const f32 ty_0 = inv_lerp(y, dst_extent_y);
|
||||
const f32 ty_1 = inv_lerp(y + 1, dst_extent_y);
|
||||
for (u32 x = 0; x < dst_width; x++) {
|
||||
const f32 tx_0 = inv_lerp(x, dst_extent_x);
|
||||
const f32 tx_1 = inv_lerp(x + 1, dst_extent_x);
|
||||
const std::array<f32, 4> get_pixel = [&](f32 tx, f32 ty, u32 width, u32 height) {
|
||||
std::array<f32, 4> result{};
|
||||
|
||||
return (std::llround(width * tx) + std::llround(height * ty) * width) * 4;
|
||||
};
|
||||
std::array<f32, 4> result{};
|
||||
|
||||
const size_t read_from = get_pixel(src_width, src_height);
|
||||
const size_t write_to = get_pixel(tx_0, ty_0, dst_width, dst_height);
|
||||
|
||||
std::memcpy(&output[write_to], &input[read_from], bpp);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
} // namespace
|
||||
|
||||
struct SoftwareBlitEngine::BlitEngineImpl {
|
||||
std::vector<u8> tmp_buffer;
|
||||
std::vector<u8> src_buffer;
|
||||
std::vector<u8> dst_buffer;
|
||||
std::vector<f32> intermediate_src;
|
||||
std::vector<f32> intermediate_dst;
|
||||
ConverterFactory converter_factory;
|
||||
};
|
||||
|
||||
SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_)
|
||||
: memory_manager{memory_manager_} {
|
||||
impl = std::make_unique<BlitEngineImpl>();
|
||||
}
|
||||
|
||||
SoftwareBlitEngine::~SoftwareBlitEngine() = default;
|
||||
|
||||
bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
|
||||
Fermi2D::Config& config) {
|
||||
UNIMPLEMENTED_IF(config.filter == Fermi2D::Filter::Bilinear);
|
||||
|
||||
const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) {
|
||||
if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) {
|
||||
return CalculateSize(true, bytes_per_pixel, surface.width, surface.height,
|
||||
surface.depth, surface.block_height, surface.block_depth);
|
||||
}
|
||||
return static_cast<size_t>(surface.pitch * surface.height);
|
||||
};
|
||||
const auto process_pitch_linear = [](bool unpack, std::span<u8> input, std::span<u8> output,
|
||||
u32 extent_x, u32 extent_y, u32 pitch, u32 x0, u32 y0,
|
||||
size_t bpp) {
|
||||
const size_t base_offset = x0 * bpp;
|
||||
const size_t copy_size = extent_x * bpp;
|
||||
for (u32 y = y0; y < extent_y; y++) {
|
||||
const size_t first_offset = y * pitch + base_offset;
|
||||
const size_t second_offset = y * extent_x * bpp;
|
||||
u8* write_to = unpack ? &output[first_offset] : &output[second_offset];
|
||||
const u8* read_from = unpack ? &input[second_offset] : &input[first_offset];
|
||||
std::memcpy(write_to, read_from, copy_size);
|
||||
}
|
||||
};
|
||||
|
||||
const u32 src_extent_x = config.src_x1 - config.src_x0;
|
||||
const u32 src_extent_y = config.src_y1 - config.src_y0;
|
||||
|
||||
const u32 dst_extent_x = config.dst_x1 - config.dst_x0;
|
||||
const u32 dst_extent_y = config.dst_y1 - config.dst_y0;
|
||||
const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||
const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
|
||||
const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
|
||||
impl->tmp_buffer.resize(src_size);
|
||||
memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
|
||||
|
||||
const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
|
||||
|
||||
const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
|
||||
|
||||
impl->src_buffer.resize(src_copy_size);
|
||||
|
||||
const bool no_passthrough =
|
||||
src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y;
|
||||
|
||||
const auto convertion_phase_same_format = [&]() {
|
||||
NeighrestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y,
|
||||
dst_extent_x, dst_extent_y, dst_bytes_per_pixel);
|
||||
};
|
||||
|
||||
const auto convertion_phase_ir = [&]() {
|
||||
auto* input_converter = impl->converter_factory.GetFormatConverter(src.format);
|
||||
impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * 4);
|
||||
impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * 4);
|
||||
input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src);
|
||||
|
||||
NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x,
|
||||
src_extent_y, dst_extent_x, dst_extent_y);
|
||||
|
||||
auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format);
|
||||
output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer);
|
||||
};
|
||||
|
||||
// Do actuall Blit
|
||||
|
||||
impl->dst_buffer.resize(dst_copy_size);
|
||||
if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
|
||||
UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
|
||||
src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
|
||||
src_extent_y, src.block_height, src.block_depth,
|
||||
src_extent_x * src_bytes_per_pixel);
|
||||
} else {
|
||||
process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
|
||||
src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
|
||||
}
|
||||
|
||||
// Conversion Phase
|
||||
if (no_passthrough) {
|
||||
if (src.format != dst.format) {
|
||||
convertion_phase_ir();
|
||||
} else {
|
||||
convertion_phase_same_format();
|
||||
}
|
||||
} else {
|
||||
impl->dst_buffer.swap(impl->src_buffer);
|
||||
}
|
||||
|
||||
const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
|
||||
impl->tmp_buffer.resize(dst_size);
|
||||
memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
|
||||
|
||||
if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
|
||||
SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
|
||||
dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
|
||||
dst_extent_y, dst.block_height, dst.block_depth,
|
||||
dst_extent_x * dst_bytes_per_pixel);
|
||||
} else {
|
||||
process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
|
||||
dst.pitch, config.dst_x0, config.dst_y0,
|
||||
static_cast<size_t>(dst_bytes_per_pixel));
|
||||
}
|
||||
memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines::Blitter
|
27
src/video_core/engines/sw_blitter/blitter.h
Normal file
27
src/video_core/engines/sw_blitter/blitter.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines::Blitter {
|
||||
|
||||
class SoftwareBlitEngine {
|
||||
public:
|
||||
SoftwareBlitEngine(MemoryManager& memory_manager_);
|
||||
~SoftwareBlitEngine();
|
||||
|
||||
bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config);
|
||||
|
||||
private:
|
||||
MemoryManager& memory_manager;
|
||||
struct BlitEngineImpl;
|
||||
std::unique_ptr<BlitEngineImpl> impl;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines::Blitter
|
1097
src/video_core/engines/sw_blitter/converter.cpp
Normal file
1097
src/video_core/engines/sw_blitter/converter.cpp
Normal file
File diff suppressed because it is too large
Load diff
35
src/video_core/engines/sw_blitter/converter.h
Normal file
35
src/video_core/engines/sw_blitter/converter.h
Normal file
|
@ -0,0 +1,35 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra::Engines::Blitter {
|
||||
|
||||
class Converter {
|
||||
public:
|
||||
virtual void ConvertTo(std::span<u8> input, std::span<f32> output) = 0;
|
||||
virtual void ConvertFrom(std::span<f32> input, std::span<u8> output) = 0;
|
||||
};
|
||||
|
||||
class ConverterFactory {
|
||||
public:
|
||||
ConverterFactory();
|
||||
~ConverterFactory();
|
||||
|
||||
Converter* GetFormatConverter(RenderTargetFormat format);
|
||||
|
||||
private:
|
||||
Converter* BuildConverter(RenderTargetFormat format);
|
||||
|
||||
struct ConverterFactoryImpl;
|
||||
std::unique_ptr<ConverterFactoryImpl> impl;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines::Blitter
|
Loading…
Add table
Add a link
Reference in a new issue