diff --git a/src/core/libraries/playgo/playgo.cpp b/src/core/libraries/playgo/playgo.cpp index ade2ee496..aec32e139 100644 --- a/src/core/libraries/playgo/playgo.cpp +++ b/src/core/libraries/playgo/playgo.cpp @@ -137,9 +137,6 @@ s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle, s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) { - LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, - *chunkIds, numberOfEntries); - if (handle != PlaygoHandle) { return ORBIS_PLAYGO_ERROR_BAD_HANDLE; } @@ -149,6 +146,10 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh if (numberOfEntries == 0) { return ORBIS_PLAYGO_ERROR_BAD_SIZE; } + + LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, + *chunkIds, numberOfEntries); + if (!playgo) { return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 017b77cdb..98d587e00 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -57,15 +57,25 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1 } u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { - static constexpr u64 MinSizeToClamp = 1_GB; + static constexpr u64 MinSizeToClamp = 512_MB; // Dont bother with clamping if the size is small so we dont pay a map lookup on every buffer. if (size < MinSizeToClamp) { return size; } - const auto vma = FindVMA(virtual_addr); + + // Clamp size to the remaining size of the current VMA. + auto vma = FindVMA(virtual_addr); ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr); - const u64 clamped_size = - std::min(size, vma->second.base + vma->second.size - virtual_addr); + u64 clamped_size = vma->second.base + vma->second.size - virtual_addr; + ++vma; + + // Keep adding to the size while there is contigious virtual address space. + while (!vma->second.IsFree() && clamped_size < size) { + clamped_size += vma->second.size; + ++vma; + } + clamped_size = std::min(clamped_size, size); + if (size != clamped_size) { LOG_WARNING(Kernel_Vmm, "Clamped requested buffer range addr={:#x}, size={:#x} to {:#x}", virtual_addr, size, clamped_size); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2a5b9335e..6b0d7228b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -369,7 +369,12 @@ void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo& LOG_WARNING(Render_Vulkan, "Unknown FP denorm mode {}", u32(fp_denorm_mode)); } const auto fp_round_mode = runtime_info.fp_round_mode32; - if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) { + if (fp_round_mode == AmdGpu::FpRoundMode::ToZero) { + if (profile.support_fp32_round_to_zero) { + ctx.AddCapability(spv::Capability::RoundingModeRTZ); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 32U); + } + } else if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) { LOG_WARNING(Render_Vulkan, "Unknown FP rounding mode {}", u32(fp_round_mode)); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp index e74044f63..48aa9f870 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp @@ -13,8 +13,7 @@ constexpr u32 SPIRV_VERSION_1_5 = 0x00010500; struct QuadRectListEmitter : public Sirit::Module { explicit QuadRectListEmitter(const FragmentRuntimeInfo& fs_info_) - : Sirit::Module{SPIRV_VERSION_1_5}, fs_info{fs_info_}, inputs{fs_info_.num_inputs}, - outputs{fs_info_.num_inputs} { + : Sirit::Module{SPIRV_VERSION_1_5}, fs_info{fs_info_} { void_id = TypeVoid(); bool_id = TypeBool(); float_id = TypeFloat(32); @@ -253,15 +252,16 @@ private: } else { gl_per_vertex = AddOutput(gl_per_vertex_type); } + outputs.reserve(fs_info.num_inputs); for (int i = 0; i < fs_info.num_inputs; i++) { const auto& input = fs_info.inputs[i]; if (input.IsDefault()) { continue; } - outputs[i] = AddOutput(model == spv::ExecutionModel::TessellationControl - ? TypeArray(vec4_id, Int(4)) - : vec4_id); - Decorate(outputs[i], spv::Decoration::Location, input.param_index); + outputs.emplace_back(AddOutput(model == spv::ExecutionModel::TessellationControl + ? TypeArray(vec4_id, Int(4)) + : vec4_id)); + Decorate(outputs.back(), spv::Decoration::Location, input.param_index); } } @@ -276,13 +276,14 @@ private: const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 32U))}; gl_in = AddInput(gl_per_vertex_array); const Id float_arr{TypeArray(vec4_id, Int(32))}; + inputs.reserve(fs_info.num_inputs); for (int i = 0; i < fs_info.num_inputs; i++) { const auto& input = fs_info.inputs[i]; if (input.IsDefault()) { continue; } - inputs[i] = AddInput(float_arr); - Decorate(inputs[i], spv::Decoration::Location, input.param_index); + inputs.emplace_back(AddInput(float_arr)); + Decorate(inputs.back(), spv::Decoration::Location, input.param_index); } } @@ -334,4 +335,4 @@ std::vector EmitAuxilaryTessShader(AuxShaderType type, const FragmentRuntim return ctx.Assemble(); } -} // namespace Shader::Backend::SPIRV \ No newline at end of file +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index 0ee52cf19..baf6ad0d1 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -43,6 +43,10 @@ static void EmitBarrierInBlock(IR::Block* block) { action = BarrierAction::BarrierOnRead; } } + if (action != BarrierAction::None) { + IR::IREmitter ir{*block, --block->end()}; + ir.Barrier(); + } } // Inserts a barrier after divergent conditional blocks to avoid undefined diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 43d2b87d4..1ceaea664 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -21,6 +21,7 @@ struct Profile { bool support_separate_rounding_mode{}; bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_flush{}; + bool support_fp32_round_to_zero{}; bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; bool supports_image_load_store_lod{}; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2f40d4136..246c8c947 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -93,17 +93,14 @@ void Liverpool::Process(std::stop_token stoken) { // Process incoming commands with high priority while (num_commands) { - Common::UniqueFunction callback{}; { std::unique_lock lk{submit_mutex}; - callback = std::move(command_queue.back()); + callback = std::move(command_queue.front()); command_queue.pop(); + --num_commands; } - callback(); - - --num_commands; } curr_qid = (curr_qid + 1) % num_mapped_queues; @@ -395,6 +392,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); regs.index_buffer_type.raw = index_type->raw; @@ -670,6 +671,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanIsVoLabel(wait_addr) && num_submits == mapped_queues[GfxQueueId].submits.size()) { vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); + break; } while (!wait_reg_mem->Test()) { YIELD_GFX(); @@ -693,7 +695,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - offset += mip_ofs * num_layers; - if (offset + (mip_size * num_layers) > max_offset) { + offset += mip_ofs; + if (offset + mip_size > max_offset) { break; } copies.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3db22d585..4823b8ffe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -196,6 +196,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .subgroup_size = instance.SubgroupSize(), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), + .support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32), .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h index 4e9587e46..0f70312ed 100644 --- a/src/video_core/renderer_vulkan/vk_platform.h +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -3,11 +3,8 @@ #pragma once -#include -#include #include -#include "common/config.h" #include "common/logging/log.h" #include "common/types.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -33,9 +30,6 @@ concept VulkanHandleType = vk::isVulkanHandleType::value; template void SetObjectName(vk::Device device, const HandleType& handle, std::string_view debug_name) { - if (!Config::getVkHostMarkersEnabled()) { - return; - } const vk::DebugUtilsObjectNameInfoEXT name_info = { .objectType = HandleType::objectType, .objectHandle = reinterpret_cast(static_cast(handle)), @@ -50,9 +44,6 @@ void SetObjectName(vk::Device device, const HandleType& handle, std::string_view template void SetObjectName(vk::Device device, const HandleType& handle, const char* format, const Args&... args) { - if (!Config::getVkHostMarkersEnabled()) { - return; - } const std::string debug_name = fmt::vformat(format, fmt::make_format_args(args...)); SetObjectName(device, handle, debug_name); } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 96881c564..3c85c451c 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -394,7 +394,7 @@ void Image::CopyImage(const Image& image) { vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } -void Image::CopyMip(const Image& image, u32 mip) { +void Image::CopyMip(const Image& image, u32 mip, u32 slice) { scheduler->EndRendering(); Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); @@ -407,18 +407,19 @@ void Image::CopyMip(const Image& image, u32 mip) { ASSERT(mip_w == image.info.size.width); ASSERT(mip_h == image.info.size.height); + const u32 num_layers = std::min(image.info.resources.layers, info.resources.layers); const vk::ImageCopy image_copy{ .srcSubresource{ .aspectMask = image.aspect_mask, .mipLevel = 0, .baseArrayLayer = 0, - .layerCount = image.info.resources.layers, + .layerCount = num_layers, }, .dstSubresource{ .aspectMask = image.aspect_mask, .mipLevel = mip, - .baseArrayLayer = 0, - .layerCount = info.resources.layers, + .baseArrayLayer = slice, + .layerCount = num_layers, }, .extent = {mip_w, mip_h, mip_d}, }; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index b04fd188c..66d65ceec 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -104,7 +104,7 @@ struct Image { void Upload(vk::Buffer buffer, u64 offset); void CopyImage(const Image& image); - void CopyMip(const Image& image, u32 mip); + void CopyMip(const Image& src_image, u32 mip, u32 slice); bool IsTracked() { return track_addr != 0 && track_addr_end != 0; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 852ade1f0..60c52c666 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -208,15 +208,14 @@ void ImageInfo::UpdateSize() { mip_info.pitch = std::max(mip_info.pitch * 4, 32u); mip_info.height = std::max(mip_info.height * 4, 32u); } - mip_info.size *= mip_d; + mip_info.size *= mip_d * resources.layers; mip_info.offset = guest_size; mips_layout.emplace_back(mip_info); guest_size += mip_info.size; } - guest_size *= resources.layers; } -int ImageInfo::IsMipOf(const ImageInfo& info) const { +s32 ImageInfo::MipOf(const ImageInfo& info) const { if (!IsCompatible(info)) { return -1; } @@ -237,7 +236,12 @@ int ImageInfo::IsMipOf(const ImageInfo& info) const { // Find mip auto mip = -1; for (auto m = 0; m < info.mips_layout.size(); ++m) { - if (guest_address == (info.guest_address + info.mips_layout[m].offset)) { + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = info.mips_layout[m]; + const VAddr mip_base = info.guest_address + mip_ofs; + const VAddr mip_end = mip_base + mip_size; + const u32 slice_size = mip_size / info.resources.layers; + if (guest_address >= mip_base && guest_address < mip_end && + (guest_address - mip_base) % slice_size == 0) { mip = m; break; } @@ -246,7 +250,6 @@ int ImageInfo::IsMipOf(const ImageInfo& info) const { if (mip < 0) { return -1; } - ASSERT(mip != 0); const auto mip_w = std::max(info.size.width >> mip, 1u); const auto mip_h = std::max(info.size.height >> mip, 1u); @@ -269,7 +272,7 @@ int ImageInfo::IsMipOf(const ImageInfo& info) const { return mip; } -int ImageInfo::IsSliceOf(const ImageInfo& info) const { +s32 ImageInfo::SliceOf(const ImageInfo& info, s32 mip) const { if (!IsCompatible(info)) { return -1; } @@ -285,13 +288,13 @@ int ImageInfo::IsSliceOf(const ImageInfo& info) const { } // Check for size alignment. - const bool slice_size = info.guest_size / info.resources.layers; + const u32 slice_size = info.mips_layout[mip].size / info.resources.layers; if (guest_size % slice_size != 0) { return -1; } // Ensure that address is aligned too. - const auto addr_diff = guest_address - info.guest_address; + const auto addr_diff = guest_address - (info.guest_address + info.mips_layout[mip].offset); if ((addr_diff % guest_size) != 0) { return -1; } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index dad0e751e..ca4d9f5e9 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -30,8 +30,8 @@ struct ImageInfo { bool IsDepthStencil() const; bool HasStencil() const; - int IsMipOf(const ImageInfo& info) const; - int IsSliceOf(const ImageInfo& info) const; + s32 MipOf(const ImageInfo& info) const; + s32 SliceOf(const ImageInfo& info, s32 mip) const; /// Verifies if images are compatible for subresource merging. bool IsCompatible(const ImageInfo& info) const { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index ecac78847..d41ee57cc 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -223,16 +223,13 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag // Right overlap, the image requested is a possible subresource of the image from cache. if (image_info.guest_address > tex_cache_image.info.guest_address) { - if (auto mip = image_info.IsMipOf(tex_cache_image.info); mip >= 0) { - return {cache_image_id, mip, -1}; + if (auto mip = image_info.MipOf(tex_cache_image.info); mip >= 0) { + if (auto slice = image_info.SliceOf(tex_cache_image.info, mip); slice >= 0) { + return {cache_image_id, mip, slice}; + } } - if (auto slice = image_info.IsSliceOf(tex_cache_image.info); slice >= 0) { - return {cache_image_id, -1, slice}; - } - - // TODO: slice and mip - + // Image isn't a subresource but a chance overlap. if (safe_to_delete) { FreeImage(cache_image_id); } @@ -240,31 +237,33 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag return {{}, -1, -1}; } else { // Left overlap, the image from cache is a possible subresource of the image requested - if (auto mip = tex_cache_image.info.IsMipOf(image_info); mip >= 0) { - if (tex_cache_image.binding.is_target) { - // We have a larger image created and a separate one, representing a subres of it, - // bound as render target. In this case we need to rebind render target. - tex_cache_image.binding.needs_rebind = 1u; - if (merged_image_id) { - GetImage(merged_image_id).binding.is_target = 1u; + if (auto mip = tex_cache_image.info.MipOf(image_info); mip >= 0) { + if (auto slice = tex_cache_image.info.SliceOf(image_info, mip); slice >= 0) { + if (tex_cache_image.binding.is_target) { + // We have a larger image created and a separate one, representing a subres of + // it, bound as render target. In this case we need to rebind render target. + tex_cache_image.binding.needs_rebind = 1u; + if (merged_image_id) { + GetImage(merged_image_id).binding.is_target = 1u; + } + + FreeImage(cache_image_id); + return {merged_image_id, -1, -1}; } - FreeImage(cache_image_id); - return {merged_image_id, -1, -1}; - } + // We need to have a larger, already allocated image to copy this one into + if (merged_image_id) { + tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits2::eTransferRead, {}); - // We need to have a larger, already allocated image to copy this one into - if (merged_image_id) { - tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, - vk::AccessFlagBits2::eTransferRead, {}); + const auto num_mips_to_copy = tex_cache_image.info.resources.levels; + ASSERT(num_mips_to_copy == 1); - const auto num_mips_to_copy = tex_cache_image.info.resources.levels; - ASSERT(num_mips_to_copy == 1); + auto& merged_image = slot_images[merged_image_id]; + merged_image.CopyMip(tex_cache_image, mip, slice); - auto& merged_image = slot_images[merged_image_id]; - merged_image.CopyMip(tex_cache_image, mip); - - FreeImage(cache_image_id); + FreeImage(cache_image_id); + } } } } @@ -374,12 +373,16 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { RegisterImage(image_id); } + Image& image = slot_images[image_id]; + image.tick_accessed_last = scheduler.CurrentTick(); + + // If the image requested is a subresource of the image from cache record its location. if (view_mip > 0) { desc.view_info.range.base.level = view_mip; } - - Image& image = slot_images[image_id]; - image.tick_accessed_last = scheduler.CurrentTick(); + if (view_slice > 0) { + desc.view_info.range.base.layer = view_slice; + } return image_id; } @@ -526,7 +529,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule } image_copy.push_back({ - .bufferOffset = mip.offset * num_layers, + .bufferOffset = mip.offset, .bufferRowLength = static_cast(mip.pitch), .bufferImageHeight = static_cast(mip.height), .imageSubresource{ diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index ede91d128..d7fc54338 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -279,8 +279,7 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o ASSERT(info.resources.levels <= 14); std::memset(¶ms.sizes, 0, sizeof(params.sizes)); for (int m = 0; m < info.resources.levels; ++m) { - params.sizes[m] = info.mips_layout[m].size * info.resources.layers + - (m > 0 ? params.sizes[m - 1] : 0); + params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0); } }