video_core: Track renderpass scopes properly

This commit is contained in:
IndecisiveTurtle 2024-07-01 00:43:59 +03:00 committed by TheTurtle
parent ad10020836
commit 22b930ba5e
36 changed files with 400 additions and 166 deletions

View file

@ -187,6 +187,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::ClearState: {
break;
}
case PM4ItOpcode::SetConfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;

View file

@ -43,6 +43,7 @@ struct Liverpool {
static constexpr u32 NumShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ConfigRegWordOffset = 0x2000;
static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000;
@ -789,6 +790,7 @@ struct Liverpool {
u32 raw;
BitField<0, 1, u32> depth_clear_enable;
BitField<1, 1, u32> stencil_clear_enable;
BitField<6, 1, u32> depth_compress_disable;
};
union AaConfig {

View file

@ -366,6 +366,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc7UnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc2UnormBlock;
}
@ -376,9 +379,15 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eA2R10G10B10UnormPack32;
}
if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eA2R10G10B10SnormPack32;
}
if (data_format == AmdGpu::DataFormat::Format10_11_11 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eB10G11R11UfloatPack32;
}
if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16Sfloat;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -381,6 +381,7 @@ Frame* RendererVulkan::GetRenderFrame() {
{
std::unique_lock lock{free_mutex};
free_cv.wait(lock, [this] { return !free_queue.empty(); });
LOG_INFO(Render_Vulkan, "Got render frame, remaining {}", free_queue.size() - 1);
// Take the frame from the queue
frame = free_queue.front();

View file

@ -85,7 +85,7 @@ ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};
@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
// need its full emulation anyways. For cases of metadata read a warning will be logged.
if (buffer.is_storage) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
LOG_WARNING(Render_Vulkan, "Metadata update skipped");
return false;
}
} else {
@ -127,7 +127,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
for (const auto& image : info.images) {
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,

View file

@ -187,7 +187,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = num_color_formats,
.pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
.depthAttachmentFormat = key.depth_format,
.stencilAttachmentFormat = vk::Format::eUndefined,
};
@ -320,7 +320,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};
@ -350,9 +350,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& image : stage.images) {
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
vk::ImageLayout::eShaderReadOnlyOptimal);
(image.is_storage || image.is_depth) ? vk::ImageLayout::eGeneral
: vk::ImageLayout::eShaderReadOnlyOptimal);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,

View file

@ -205,6 +205,7 @@ bool Instance::CreateDevice() {
.logicOp = features.logicOp,
.samplerAnisotropy = features.samplerAnisotropy,
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
.shaderImageGatherExtended = true,
.shaderStorageImageMultisample = true,
.shaderClipDistance = features.shaderClipDistance,
},

View file

@ -117,8 +117,8 @@ void PipelineCache::RefreshGraphicsKey() {
key.num_samples = regs.aa_config.NumSamples();
const auto& db = regs.depth_buffer;
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
if (key.depth.depth_enable) {
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
}
@ -206,6 +206,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
if (hash == 0xa34c48f8) {
printf("bad\n");
}
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
@ -214,12 +218,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
// Compile IR to SPIR-V
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
if (Config::dumpShaders()) {
DumpShader(spv_code, hash, stage, "spv");
}
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
} catch (const Shader::Exception& e) {
UNREACHABLE_MSG("{}", e.what());
}
@ -246,22 +249,25 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
inst_pool.ReleaseContents();
// Recompile shader to IR.
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
const Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
try {
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
const Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
u32 binding{};
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding);
const auto module = CompileSPV(spv_code, instance.GetDevice());
if (Config::dumpShaders()) {
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
// Compile IR to SPIR-V
u32 binding{};
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding);
if (Config::dumpShaders()) {
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
}
const auto module = CompileSPV(spv_code, instance.GetDevice());
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, &program.info,
module);
} catch (const Shader::Exception& e) {
UNREACHABLE_MSG("{}", e.what());
return nullptr;
}
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, &program.info,
module);
}
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,

View file

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
@ -46,71 +46,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
color_attachments{};
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
continue;
}
const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
color_attachments.push_back({
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
});
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
}
vk::RenderingAttachmentInfo depth_attachment{};
u32 num_depth_attachments{};
if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) {
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
texture_cache.IsMetaCleared(htile_address);
const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, htile_address, liverpool->last_db_extent);
depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
};
texture_cache.TouchMeta(htile_address, false);
num_depth_attachments++;
}
// TODO: Don't restart renderpass every draw
const auto& scissor = regs.screen_scissor;
vk::RenderingInfo rendering_info = {
.renderArea =
{
.offset = {scissor.top_left_x, scissor.top_left_y},
.extent = {scissor.GetWidth(), scissor.GetHeight()},
},
.layerCount = 1,
.colorAttachmentCount = static_cast<u32>(color_attachments.size()),
.pColorAttachments = color_attachments.data(),
.pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr,
};
auto& area = rendering_info.renderArea.extent;
if (area.width == 2048) {
area.width = 1920;
area.height = 1080;
}
BeginRendering();
UpdateDynamicState(*pipeline);
cmdbuf.beginRendering(rendering_info);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
@ -120,7 +58,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
: regs.num_indices;
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();
}
void Rasterizer::DispatchDirect() {
@ -138,15 +75,66 @@ void Rasterizer::DispatchDirect() {
return;
}
scheduler.EndRendering();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
}
void Rasterizer::BeginRendering() {
const auto& regs = liverpool->regs;
RenderState state;
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
continue;
}
const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
state.width = std::min<u32>(state.width, hint.width);
state.height = std::min<u32>(state.height, hint.height);
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
state.color_attachments[state.num_color_attachments++] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
};
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
}
if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invald &&
regs.depth_buffer.Address() != 0) {
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
texture_cache.IsMetaCleared(htile_address);
const auto& hint = liverpool->last_db_extent;
const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint);
state.width = std::min<u32>(state.width, hint.width);
state.height = std::min<u32>(state.height, hint.height);
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
};
texture_cache.TouchMeta(htile_address, false);
state.num_depth_attachments++;
}
scheduler.BeginRendering(state);
}
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
//ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
// Emit indices.

View file

@ -37,6 +37,8 @@ private:
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
void MapMemory(VAddr addr, size_t size);
void BeginRendering();
void UpdateDynamicState(const GraphicsPipeline& pipeline);
void UpdateViewportScissorState();
void UpdateDepthStencilState();

View file

@ -18,6 +18,37 @@ Scheduler::~Scheduler() {
std::free(profiler_scope);
}
void Scheduler::BeginRendering(const RenderState& new_state) {
if (is_rendering && render_state == new_state) {
return;
}
EndRendering();
is_rendering = true;
render_state = new_state;
const vk::RenderingInfo rendering_info = {
.renderArea = {
.offset = {0, 0},
.extent = {render_state.width, render_state.height},
},
.layerCount = 1,
.colorAttachmentCount = static_cast<u32>(render_state.color_attachments.size()),
.pColorAttachments = render_state.color_attachments.data(),
.pDepthAttachment = render_state.num_depth_attachments ?
&render_state.depth_attachment : nullptr,
};
current_cmdbuf.beginRendering(rendering_info);
}
void Scheduler::EndRendering() {
if (!is_rendering) {
return;
}
is_rendering = false;
current_cmdbuf.endRendering();
}
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
// When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal, wait);
@ -55,6 +86,7 @@ void Scheduler::AllocateWorkerCommandBuffers() {
}
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
std::scoped_lock lk{submit_mutex};
const u64 signal_value = master_semaphore.NextTick();
auto* profiler_ctx = instance.GetProfilerContext();
@ -63,7 +95,7 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
TracyVkCollect(profiler_ctx, current_cmdbuf);
}
std::scoped_lock lk{submit_mutex};
EndRendering();
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
master_semaphore.Refresh();
AllocateWorkerCommandBuffers();

View file

@ -4,6 +4,7 @@
#pragma once
#include <condition_variable>
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -12,6 +13,19 @@ namespace Vulkan {
class Instance;
struct RenderState {
std::array<vk::RenderingAttachmentInfo, 8> color_attachments{};
vk::RenderingAttachmentInfo depth_attachment{};
u32 num_color_attachments{};
u32 num_depth_attachments{};
u32 width = std::numeric_limits<u32>::max();
u32 height = std::numeric_limits<u32>::max();
bool operator==(const RenderState& other) const noexcept {
return std::memcmp(this, &other, sizeof(RenderState)) == 0;
}
};
class Scheduler {
public:
explicit Scheduler(const Instance& instance);
@ -26,6 +40,12 @@ public:
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick);
/// Starts a new rendering scope with provided state.
void BeginRendering(const RenderState& new_state);
/// Ends current rendering scope.
void EndRendering();
/// Returns the current command buffer.
vk::CommandBuffer CommandBuffer() const {
return current_cmdbuf;
@ -59,6 +79,8 @@ private:
CommandPool command_pool;
vk::CommandBuffer current_cmdbuf;
std::condition_variable_any event_cv;
RenderState render_state;
bool is_rendering = false;
tracy::VkCtxScope* profiler_scope{};
};

View file

@ -221,6 +221,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
ASSERT(info.pixel_format != vk::Format::eUndefined);
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
vk::ImageCreateFlagBits::eExtendedUsage};
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
@ -272,7 +273,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask) {
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
vk::CommandBuffer cmdbuf) {
if (dst_layout == layout && dst_mask == access_mask) {
return;
}
@ -300,7 +302,12 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
dst_mask == vk::AccessFlagBits::eTransferWrite)
? vk::PipelineStageFlagBits::eTransfer
: vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader;
const auto cmdbuf = scheduler->CommandBuffer();
if (!cmdbuf) {
// When using external cmdbuf you are responsible for ending rp.
scheduler->EndRendering();
cmdbuf = scheduler->CommandBuffer();
}
cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {},
barrier);
@ -310,6 +317,7 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
}
void Image::Upload(vk::Buffer buffer, u64 offset) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// Copy to the image.
@ -318,7 +326,7 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
.bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,

View file

@ -132,7 +132,8 @@ struct Image {
return image_view_ids[std::distance(image_view_infos.begin(), it)];
}
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask);
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset);
const Vulkan::Instance* instance;

View file

@ -80,8 +80,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format;
vk::ImageAspectFlags aspect = image.aspect_mask;
if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && format == vk::Format::eR32Sfloat) {
format = vk::Format::eD32Sfloat;
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eDepth;
}
const vk::ImageViewCreateInfo image_view_ci = {
@ -91,7 +93,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
.format = format,
.components = info.mapping,
.subresourceRange{
.aspectMask = image.aspect_mask,
.aspectMask = aspect,
.baseMipLevel = 0U,
.levelCount = 1,
.baseArrayLayer = 0,

View file

@ -116,10 +116,15 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re
std::unique_lock lock{m_page_table};
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
if (image.cpu_addr == cpu_address && image.info.size.width == info.size.width &&
image.info.IsDepthStencil() == info.IsDepthStencil()) {
image_ids.push_back(image_id);
// Address and width must match.
if (image.cpu_addr != cpu_address || image.info.size.width != info.size.width) {
return;
}
if (info.IsDepthStencil() != image.info.IsDepthStencil() &&
info.pixel_format != vk::Format::eR32Sfloat) {
return;
}
image_ids.push_back(image_id);
});
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
@ -129,7 +134,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids[0];
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
}
RegisterMeta(info, image_id);
@ -163,11 +168,11 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
return slot_image_views[view_id];
}
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage, bool is_depth) {
const ImageInfo info{desc};
Image& image = FindImage(info, desc.Address());
if (is_storage) {
if (is_storage || is_depth) {
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
image.info.usage.storage = true;
} else {
@ -202,7 +207,7 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
image.Transit(vk::ImageLayout::eDepthStencilAttachmentOptimal,
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead);
@ -261,6 +266,8 @@ void TextureCache::RefreshImage(Image& image) {
.imageExtent = {width, height, 1},
};
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);

View file

@ -52,7 +52,8 @@ public:
bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage);
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage,
bool is_depth);
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,

View file

@ -231,7 +231,7 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
: instance{instance}, scheduler{scheduler},
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
staging{instance, scheduler, StagingFlags, 128_MB, Vulkan::BufferType::Upload} {
static const std::array detiler_shaders{
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,