video_core: Various small improvements and bug fixes (#2525)

* ir_passes: Add barrier at end of block too

* vk_platform: Always assign names to resources

* texture_cache: Better overlap handling

* liverpool: Avoid resuming ce_task when its finished

* spirv_quad_rect: Skip default attributes

Fixes some crashes

* memory: Improve buffer size clamping

* liverpool: Relax binary header validity check

* liverpool: Stub SetPredication with a warning

* Better than outright crash

* emit_spirv: Implement round to zero mode

* liverpool: queue::pop takes the front element

* image_info: Remove obsolete assert

The old code assumed the mip only had 1 layer thus a right overlap could not return mip 0. But with the new path we handle images that are both mip-mapped and multi-layer, thus this can happen

* tile_manager: Fix size calculation

* spirv_quad_rect: Skip default attributes

---------

Co-authored-by: poly <47796739+polybiusproxy@users.noreply.github.com>
Co-authored-by: squidbus <175574877+squidbus@users.noreply.github.com>
This commit is contained in:
TheTurtle 2025-02-24 14:31:12 +02:00 committed by GitHub
parent 0885d8fce7
commit 76b4da6212
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 112 additions and 88 deletions

View file

@ -137,9 +137,6 @@ s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle,
s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds,
uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) { uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) {
LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle,
*chunkIds, numberOfEntries);
if (handle != PlaygoHandle) { if (handle != PlaygoHandle) {
return ORBIS_PLAYGO_ERROR_BAD_HANDLE; return ORBIS_PLAYGO_ERROR_BAD_HANDLE;
} }
@ -149,6 +146,10 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh
if (numberOfEntries == 0) { if (numberOfEntries == 0) {
return ORBIS_PLAYGO_ERROR_BAD_SIZE; return ORBIS_PLAYGO_ERROR_BAD_SIZE;
} }
LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle,
*chunkIds, numberOfEntries);
if (!playgo) { if (!playgo) {
return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED;
} }

View file

@ -57,15 +57,25 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1
} }
u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
static constexpr u64 MinSizeToClamp = 1_GB; static constexpr u64 MinSizeToClamp = 512_MB;
// Dont bother with clamping if the size is small so we dont pay a map lookup on every buffer. // Dont bother with clamping if the size is small so we dont pay a map lookup on every buffer.
if (size < MinSizeToClamp) { if (size < MinSizeToClamp) {
return size; return size;
} }
const auto vma = FindVMA(virtual_addr);
// Clamp size to the remaining size of the current VMA.
auto vma = FindVMA(virtual_addr);
ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr); ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr);
const u64 clamped_size = u64 clamped_size = vma->second.base + vma->second.size - virtual_addr;
std::min<u64>(size, vma->second.base + vma->second.size - virtual_addr); ++vma;
// Keep adding to the size while there is contigious virtual address space.
while (!vma->second.IsFree() && clamped_size < size) {
clamped_size += vma->second.size;
++vma;
}
clamped_size = std::min(clamped_size, size);
if (size != clamped_size) { if (size != clamped_size) {
LOG_WARNING(Kernel_Vmm, "Clamped requested buffer range addr={:#x}, size={:#x} to {:#x}", LOG_WARNING(Kernel_Vmm, "Clamped requested buffer range addr={:#x}, size={:#x} to {:#x}",
virtual_addr, size, clamped_size); virtual_addr, size, clamped_size);

View file

@ -369,7 +369,12 @@ void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo&
LOG_WARNING(Render_Vulkan, "Unknown FP denorm mode {}", u32(fp_denorm_mode)); LOG_WARNING(Render_Vulkan, "Unknown FP denorm mode {}", u32(fp_denorm_mode));
} }
const auto fp_round_mode = runtime_info.fp_round_mode32; const auto fp_round_mode = runtime_info.fp_round_mode32;
if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) { if (fp_round_mode == AmdGpu::FpRoundMode::ToZero) {
if (profile.support_fp32_round_to_zero) {
ctx.AddCapability(spv::Capability::RoundingModeRTZ);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 32U);
}
} else if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) {
LOG_WARNING(Render_Vulkan, "Unknown FP rounding mode {}", u32(fp_round_mode)); LOG_WARNING(Render_Vulkan, "Unknown FP rounding mode {}", u32(fp_round_mode));
} }
} }

View file

@ -13,8 +13,7 @@ constexpr u32 SPIRV_VERSION_1_5 = 0x00010500;
struct QuadRectListEmitter : public Sirit::Module { struct QuadRectListEmitter : public Sirit::Module {
explicit QuadRectListEmitter(const FragmentRuntimeInfo& fs_info_) explicit QuadRectListEmitter(const FragmentRuntimeInfo& fs_info_)
: Sirit::Module{SPIRV_VERSION_1_5}, fs_info{fs_info_}, inputs{fs_info_.num_inputs}, : Sirit::Module{SPIRV_VERSION_1_5}, fs_info{fs_info_} {
outputs{fs_info_.num_inputs} {
void_id = TypeVoid(); void_id = TypeVoid();
bool_id = TypeBool(); bool_id = TypeBool();
float_id = TypeFloat(32); float_id = TypeFloat(32);
@ -253,15 +252,16 @@ private:
} else { } else {
gl_per_vertex = AddOutput(gl_per_vertex_type); gl_per_vertex = AddOutput(gl_per_vertex_type);
} }
outputs.reserve(fs_info.num_inputs);
for (int i = 0; i < fs_info.num_inputs; i++) { for (int i = 0; i < fs_info.num_inputs; i++) {
const auto& input = fs_info.inputs[i]; const auto& input = fs_info.inputs[i];
if (input.IsDefault()) { if (input.IsDefault()) {
continue; continue;
} }
outputs[i] = AddOutput(model == spv::ExecutionModel::TessellationControl outputs.emplace_back(AddOutput(model == spv::ExecutionModel::TessellationControl
? TypeArray(vec4_id, Int(4)) ? TypeArray(vec4_id, Int(4))
: vec4_id); : vec4_id));
Decorate(outputs[i], spv::Decoration::Location, input.param_index); Decorate(outputs.back(), spv::Decoration::Location, input.param_index);
} }
} }
@ -276,13 +276,14 @@ private:
const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 32U))}; const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 32U))};
gl_in = AddInput(gl_per_vertex_array); gl_in = AddInput(gl_per_vertex_array);
const Id float_arr{TypeArray(vec4_id, Int(32))}; const Id float_arr{TypeArray(vec4_id, Int(32))};
inputs.reserve(fs_info.num_inputs);
for (int i = 0; i < fs_info.num_inputs; i++) { for (int i = 0; i < fs_info.num_inputs; i++) {
const auto& input = fs_info.inputs[i]; const auto& input = fs_info.inputs[i];
if (input.IsDefault()) { if (input.IsDefault()) {
continue; continue;
} }
inputs[i] = AddInput(float_arr); inputs.emplace_back(AddInput(float_arr));
Decorate(inputs[i], spv::Decoration::Location, input.param_index); Decorate(inputs.back(), spv::Decoration::Location, input.param_index);
} }
} }
@ -334,4 +335,4 @@ std::vector<u32> EmitAuxilaryTessShader(AuxShaderType type, const FragmentRuntim
return ctx.Assemble(); return ctx.Assemble();
} }
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View file

@ -43,6 +43,10 @@ static void EmitBarrierInBlock(IR::Block* block) {
action = BarrierAction::BarrierOnRead; action = BarrierAction::BarrierOnRead;
} }
} }
if (action != BarrierAction::None) {
IR::IREmitter ir{*block, --block->end()};
ir.Barrier();
}
} }
// Inserts a barrier after divergent conditional blocks to avoid undefined // Inserts a barrier after divergent conditional blocks to avoid undefined

View file

@ -21,6 +21,7 @@ struct Profile {
bool support_separate_rounding_mode{}; bool support_separate_rounding_mode{};
bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_preserve{};
bool support_fp32_denorm_flush{}; bool support_fp32_denorm_flush{};
bool support_fp32_round_to_zero{};
bool support_explicit_workgroup_layout{}; bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{}; bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{}; bool supports_image_load_store_lod{};

View file

@ -93,17 +93,14 @@ void Liverpool::Process(std::stop_token stoken) {
// Process incoming commands with high priority // Process incoming commands with high priority
while (num_commands) { while (num_commands) {
Common::UniqueFunction<void> callback{}; Common::UniqueFunction<void> callback{};
{ {
std::unique_lock lk{submit_mutex}; std::unique_lock lk{submit_mutex};
callback = std::move(command_queue.back()); callback = std::move(command_queue.front());
command_queue.pop(); command_queue.pop();
--num_commands;
} }
callback(); callback();
--num_commands;
} }
curr_qid = (curr_qid + 1) % num_mapped_queues; curr_qid = (curr_qid + 1) % num_mapped_queues;
@ -395,6 +392,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
header + 2, (count - 1) * sizeof(u32)); header + 2, (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::SetPredication: {
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION");
break;
}
case PM4ItOpcode::IndexType: { case PM4ItOpcode::IndexType: {
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header); const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
regs.index_buffer_type.raw = index_type->raw; regs.index_buffer_type.raw = index_type->raw;
@ -670,6 +671,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
if (vo_port->IsVoLabel(wait_addr) && if (vo_port->IsVoLabel(wait_addr) &&
num_submits == mapped_queues[GfxQueueId].submits.size()) { num_submits == mapped_queues[GfxQueueId].submits.size()) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
break;
} }
while (!wait_reg_mem->Test()) { while (!wait_reg_mem->Test()) {
YIELD_GFX(); YIELD_GFX();
@ -693,7 +695,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
break; break;
} }
case PM4ItOpcode::WaitOnCeCounter: { case PM4ItOpcode::WaitOnCeCounter: {
while (cblock.ce_count <= cblock.de_count) { while (cblock.ce_count <= cblock.de_count && !ce_task.handle.done()) {
RESUME_GFX(ce_task); RESUME_GFX(ce_task);
} }
break; break;
@ -714,7 +716,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
if (ce_task.handle) { if (ce_task.handle) {
ASSERT_MSG(ce_task.handle.done(), "Partially processed CCB"); while (!ce_task.handle.done()) {
RESUME_GFX(ce_task);
}
ce_task.handle.destroy(); ce_task.handle.destroy();
} }

View file

@ -83,8 +83,7 @@ struct Liverpool {
u32 crc32; u32 crc32;
bool Valid() const { bool Valid() const {
return shader_hash && crc32 && return std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0;
(std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0);
} }
}; };

View file

@ -610,9 +610,10 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
Image& image = texture_cache.GetImage(image_id); Image& image = texture_cache.GetImage(image_id);
// Only perform sync if image is: // Only perform sync if image is:
// - GPU modified; otherwise there are no changes to synchronize. // - GPU modified; otherwise there are no changes to synchronize.
// - Not CPU modified; otherwise we could overwrite CPU changes with stale GPU changes. // - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
if (False(image.flags & ImageFlagBits::GpuModified) || if (False(image.flags & ImageFlagBits::GpuModified) ||
True(image.flags & ImageFlagBits::CpuDirty)) { True(image.flags & ImageFlagBits::Dirty)) {
return false; return false;
} }
ASSERT_MSG(device_addr == image.info.guest_address, ASSERT_MSG(device_addr == image.info.guest_address,
@ -628,8 +629,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
const u32 depth = const u32 depth =
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
offset += mip_ofs * num_layers; offset += mip_ofs;
if (offset + (mip_size * num_layers) > max_offset) { if (offset + mip_size > max_offset) {
break; break;
} }
copies.push_back({ copies.push_back({

View file

@ -196,6 +196,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.subgroup_size = instance.SubgroupSize(), .subgroup_size = instance.SubgroupSize(),
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
.support_explicit_workgroup_layout = true, .support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),

View file

@ -3,11 +3,8 @@
#pragma once #pragma once
#include <memory>
#include <variant>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
@ -33,9 +30,6 @@ concept VulkanHandleType = vk::isVulkanHandleType<T>::value;
template <VulkanHandleType HandleType> template <VulkanHandleType HandleType>
void SetObjectName(vk::Device device, const HandleType& handle, std::string_view debug_name) { void SetObjectName(vk::Device device, const HandleType& handle, std::string_view debug_name) {
if (!Config::getVkHostMarkersEnabled()) {
return;
}
const vk::DebugUtilsObjectNameInfoEXT name_info = { const vk::DebugUtilsObjectNameInfoEXT name_info = {
.objectType = HandleType::objectType, .objectType = HandleType::objectType,
.objectHandle = reinterpret_cast<u64>(static_cast<typename HandleType::NativeType>(handle)), .objectHandle = reinterpret_cast<u64>(static_cast<typename HandleType::NativeType>(handle)),
@ -50,9 +44,6 @@ void SetObjectName(vk::Device device, const HandleType& handle, std::string_view
template <VulkanHandleType HandleType, typename... Args> template <VulkanHandleType HandleType, typename... Args>
void SetObjectName(vk::Device device, const HandleType& handle, const char* format, void SetObjectName(vk::Device device, const HandleType& handle, const char* format,
const Args&... args) { const Args&... args) {
if (!Config::getVkHostMarkersEnabled()) {
return;
}
const std::string debug_name = fmt::vformat(format, fmt::make_format_args(args...)); const std::string debug_name = fmt::vformat(format, fmt::make_format_args(args...));
SetObjectName(device, handle, debug_name); SetObjectName(device, handle, debug_name);
} }

View file

@ -394,7 +394,7 @@ void Image::CopyImage(const Image& image) {
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
} }
void Image::CopyMip(const Image& image, u32 mip) { void Image::CopyMip(const Image& image, u32 mip, u32 slice) {
scheduler->EndRendering(); scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
@ -407,18 +407,19 @@ void Image::CopyMip(const Image& image, u32 mip) {
ASSERT(mip_w == image.info.size.width); ASSERT(mip_w == image.info.size.width);
ASSERT(mip_h == image.info.size.height); ASSERT(mip_h == image.info.size.height);
const u32 num_layers = std::min(image.info.resources.layers, info.resources.layers);
const vk::ImageCopy image_copy{ const vk::ImageCopy image_copy{
.srcSubresource{ .srcSubresource{
.aspectMask = image.aspect_mask, .aspectMask = image.aspect_mask,
.mipLevel = 0, .mipLevel = 0,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = image.info.resources.layers, .layerCount = num_layers,
}, },
.dstSubresource{ .dstSubresource{
.aspectMask = image.aspect_mask, .aspectMask = image.aspect_mask,
.mipLevel = mip, .mipLevel = mip,
.baseArrayLayer = 0, .baseArrayLayer = slice,
.layerCount = info.resources.layers, .layerCount = num_layers,
}, },
.extent = {mip_w, mip_h, mip_d}, .extent = {mip_w, mip_h, mip_d},
}; };

View file

@ -104,7 +104,7 @@ struct Image {
void Upload(vk::Buffer buffer, u64 offset); void Upload(vk::Buffer buffer, u64 offset);
void CopyImage(const Image& image); void CopyImage(const Image& image);
void CopyMip(const Image& image, u32 mip); void CopyMip(const Image& src_image, u32 mip, u32 slice);
bool IsTracked() { bool IsTracked() {
return track_addr != 0 && track_addr_end != 0; return track_addr != 0 && track_addr_end != 0;

View file

@ -208,15 +208,14 @@ void ImageInfo::UpdateSize() {
mip_info.pitch = std::max(mip_info.pitch * 4, 32u); mip_info.pitch = std::max(mip_info.pitch * 4, 32u);
mip_info.height = std::max(mip_info.height * 4, 32u); mip_info.height = std::max(mip_info.height * 4, 32u);
} }
mip_info.size *= mip_d; mip_info.size *= mip_d * resources.layers;
mip_info.offset = guest_size; mip_info.offset = guest_size;
mips_layout.emplace_back(mip_info); mips_layout.emplace_back(mip_info);
guest_size += mip_info.size; guest_size += mip_info.size;
} }
guest_size *= resources.layers;
} }
int ImageInfo::IsMipOf(const ImageInfo& info) const { s32 ImageInfo::MipOf(const ImageInfo& info) const {
if (!IsCompatible(info)) { if (!IsCompatible(info)) {
return -1; return -1;
} }
@ -237,7 +236,12 @@ int ImageInfo::IsMipOf(const ImageInfo& info) const {
// Find mip // Find mip
auto mip = -1; auto mip = -1;
for (auto m = 0; m < info.mips_layout.size(); ++m) { for (auto m = 0; m < info.mips_layout.size(); ++m) {
if (guest_address == (info.guest_address + info.mips_layout[m].offset)) { const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = info.mips_layout[m];
const VAddr mip_base = info.guest_address + mip_ofs;
const VAddr mip_end = mip_base + mip_size;
const u32 slice_size = mip_size / info.resources.layers;
if (guest_address >= mip_base && guest_address < mip_end &&
(guest_address - mip_base) % slice_size == 0) {
mip = m; mip = m;
break; break;
} }
@ -246,7 +250,6 @@ int ImageInfo::IsMipOf(const ImageInfo& info) const {
if (mip < 0) { if (mip < 0) {
return -1; return -1;
} }
ASSERT(mip != 0);
const auto mip_w = std::max(info.size.width >> mip, 1u); const auto mip_w = std::max(info.size.width >> mip, 1u);
const auto mip_h = std::max(info.size.height >> mip, 1u); const auto mip_h = std::max(info.size.height >> mip, 1u);
@ -269,7 +272,7 @@ int ImageInfo::IsMipOf(const ImageInfo& info) const {
return mip; return mip;
} }
int ImageInfo::IsSliceOf(const ImageInfo& info) const { s32 ImageInfo::SliceOf(const ImageInfo& info, s32 mip) const {
if (!IsCompatible(info)) { if (!IsCompatible(info)) {
return -1; return -1;
} }
@ -285,13 +288,13 @@ int ImageInfo::IsSliceOf(const ImageInfo& info) const {
} }
// Check for size alignment. // Check for size alignment.
const bool slice_size = info.guest_size / info.resources.layers; const u32 slice_size = info.mips_layout[mip].size / info.resources.layers;
if (guest_size % slice_size != 0) { if (guest_size % slice_size != 0) {
return -1; return -1;
} }
// Ensure that address is aligned too. // Ensure that address is aligned too.
const auto addr_diff = guest_address - info.guest_address; const auto addr_diff = guest_address - (info.guest_address + info.mips_layout[mip].offset);
if ((addr_diff % guest_size) != 0) { if ((addr_diff % guest_size) != 0) {
return -1; return -1;
} }

View file

@ -30,8 +30,8 @@ struct ImageInfo {
bool IsDepthStencil() const; bool IsDepthStencil() const;
bool HasStencil() const; bool HasStencil() const;
int IsMipOf(const ImageInfo& info) const; s32 MipOf(const ImageInfo& info) const;
int IsSliceOf(const ImageInfo& info) const; s32 SliceOf(const ImageInfo& info, s32 mip) const;
/// Verifies if images are compatible for subresource merging. /// Verifies if images are compatible for subresource merging.
bool IsCompatible(const ImageInfo& info) const { bool IsCompatible(const ImageInfo& info) const {

View file

@ -223,16 +223,13 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
// Right overlap, the image requested is a possible subresource of the image from cache. // Right overlap, the image requested is a possible subresource of the image from cache.
if (image_info.guest_address > tex_cache_image.info.guest_address) { if (image_info.guest_address > tex_cache_image.info.guest_address) {
if (auto mip = image_info.IsMipOf(tex_cache_image.info); mip >= 0) { if (auto mip = image_info.MipOf(tex_cache_image.info); mip >= 0) {
return {cache_image_id, mip, -1}; if (auto slice = image_info.SliceOf(tex_cache_image.info, mip); slice >= 0) {
return {cache_image_id, mip, slice};
}
} }
if (auto slice = image_info.IsSliceOf(tex_cache_image.info); slice >= 0) { // Image isn't a subresource but a chance overlap.
return {cache_image_id, -1, slice};
}
// TODO: slice and mip
if (safe_to_delete) { if (safe_to_delete) {
FreeImage(cache_image_id); FreeImage(cache_image_id);
} }
@ -240,31 +237,33 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
return {{}, -1, -1}; return {{}, -1, -1};
} else { } else {
// Left overlap, the image from cache is a possible subresource of the image requested // Left overlap, the image from cache is a possible subresource of the image requested
if (auto mip = tex_cache_image.info.IsMipOf(image_info); mip >= 0) { if (auto mip = tex_cache_image.info.MipOf(image_info); mip >= 0) {
if (tex_cache_image.binding.is_target) { if (auto slice = tex_cache_image.info.SliceOf(image_info, mip); slice >= 0) {
// We have a larger image created and a separate one, representing a subres of it, if (tex_cache_image.binding.is_target) {
// bound as render target. In this case we need to rebind render target. // We have a larger image created and a separate one, representing a subres of
tex_cache_image.binding.needs_rebind = 1u; // it, bound as render target. In this case we need to rebind render target.
if (merged_image_id) { tex_cache_image.binding.needs_rebind = 1u;
GetImage(merged_image_id).binding.is_target = 1u; if (merged_image_id) {
GetImage(merged_image_id).binding.is_target = 1u;
}
FreeImage(cache_image_id);
return {merged_image_id, -1, -1};
} }
FreeImage(cache_image_id); // We need to have a larger, already allocated image to copy this one into
return {merged_image_id, -1, -1}; if (merged_image_id) {
} tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead, {});
// We need to have a larger, already allocated image to copy this one into const auto num_mips_to_copy = tex_cache_image.info.resources.levels;
if (merged_image_id) { ASSERT(num_mips_to_copy == 1);
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead, {});
const auto num_mips_to_copy = tex_cache_image.info.resources.levels; auto& merged_image = slot_images[merged_image_id];
ASSERT(num_mips_to_copy == 1); merged_image.CopyMip(tex_cache_image, mip, slice);
auto& merged_image = slot_images[merged_image_id]; FreeImage(cache_image_id);
merged_image.CopyMip(tex_cache_image, mip); }
FreeImage(cache_image_id);
} }
} }
} }
@ -374,12 +373,16 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
RegisterImage(image_id); RegisterImage(image_id);
} }
Image& image = slot_images[image_id];
image.tick_accessed_last = scheduler.CurrentTick();
// If the image requested is a subresource of the image from cache record its location.
if (view_mip > 0) { if (view_mip > 0) {
desc.view_info.range.base.level = view_mip; desc.view_info.range.base.level = view_mip;
} }
if (view_slice > 0) {
Image& image = slot_images[image_id]; desc.view_info.range.base.layer = view_slice;
image.tick_accessed_last = scheduler.CurrentTick(); }
return image_id; return image_id;
} }
@ -526,7 +529,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
} }
image_copy.push_back({ image_copy.push_back({
.bufferOffset = mip.offset * num_layers, .bufferOffset = mip.offset,
.bufferRowLength = static_cast<u32>(mip.pitch), .bufferRowLength = static_cast<u32>(mip.pitch),
.bufferImageHeight = static_cast<u32>(mip.height), .bufferImageHeight = static_cast<u32>(mip.height),
.imageSubresource{ .imageSubresource{

View file

@ -279,8 +279,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
ASSERT(info.resources.levels <= 14); ASSERT(info.resources.levels <= 14);
std::memset(&params.sizes, 0, sizeof(params.sizes)); std::memset(&params.sizes, 0, sizeof(params.sizes));
for (int m = 0; m < info.resources.levels; ++m) { for (int m = 0; m < info.resources.levels; ++m) {
params.sizes[m] = info.mips_layout[m].size * info.resources.layers + params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0);
(m > 0 ? params.sizes[m - 1] : 0);
} }
} }