From a3bbf2274f5fad80f72d38d388c2f9a26e9adf3f Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 30 Apr 2025 11:39:38 -0700 Subject: [PATCH 01/25] fix: Mistake in store bounds check index. --- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e4071bb95..83e8afd78 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -529,7 +529,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto // Bounds checking enabled, wrap in a conditional branch. auto compare_index = index; if (N > 1) { - index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); + compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); } const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); const Id in_bounds_label = ctx.OpLabel(); From c08f92aca1a3b0cef6e9da8d5da2d689b34fff4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Wed, 30 Apr 2025 20:42:08 +0200 Subject: [PATCH 02/25] Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats (#2820) * Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats * Handle missing VK_EXT_shader_atomic_float2 --- externals/sirit | 2 +- .../backend/spirv/emit_spirv.cpp | 4 ++ .../backend/spirv/emit_spirv_atomic.cpp | 42 +++++++++++++++++++ .../backend/spirv/emit_spirv_instructions.h | 2 + .../backend/spirv/spirv_emit_context.cpp | 1 + .../backend/spirv/spirv_emit_context.h | 1 + .../frontend/translate/vector_memory.cpp | 8 ++++ src/shader_recompiler/info.h | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 10 +++++ src/shader_recompiler/ir/ir_emitter.h | 4 ++ src/shader_recompiler/ir/opcodes.inc | 2 + .../ir/passes/shader_info_collection_pass.cpp | 4 ++ src/shader_recompiler/profile.h | 1 + .../renderer_vulkan/vk_instance.cpp | 17 +++++++- src/video_core/renderer_vulkan/vk_instance.h | 8 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 16 files changed, 106 insertions(+), 2 deletions(-) diff --git a/externals/sirit b/externals/sirit index 427a42c9e..09a1416ab 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1 +Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 936f82cd6..ff38bb5d8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } + if (info.uses_atomic_float_min_max) { + ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); + ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); + } if (info.uses_lane_id) { ctx.AddCapability(spv::Capability::GroupNonUniform); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 211899714..c3799fb4b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } + +Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value); +} } // Anonymous namespace Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { @@ -187,6 +195,40 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax); } +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + if (ctx.profile.supports_image_fp32_atomic_min_max) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitImageAtomicUMin32(ctx, inst, handle, coords, u32_value)), + EmitBitCastF32U32(ctx, EmitImageAtomicSMax32(ctx, inst, handle, coords, u32_value))); + + return result; +} + +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + if (ctx.profile.supports_image_fp32_atomic_min_max) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitImageAtomicUMax32(ctx, inst, handle, coords, u32_value)), + EmitBitCastF32U32(ctx, EmitImageAtomicSMin32(ctx, inst, handle, coords, u32_value))); + + return result; +} + Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { // TODO: This is not yet implemented throw NotImplementedException("SPIR-V Instruction"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 079f1005d..269f372d5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8433251ff..2640030df 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() { } if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + image_f32 = TypePointer(spv::StorageClass::Image, F32[1]); } if (info.samplers.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 784748658..38d55e0e4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -207,6 +207,7 @@ public: Id invocation_id{}; Id subgroup_local_invocation_id{}; Id image_u32{}; + Id image_f32{}; Id shared_memory_u8{}; Id shared_memory_u16{}; diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ed7788d8c..cfc01c58f 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -115,8 +115,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return IMAGE_ATOMIC(AtomicOp::Smin, inst); case Opcode::IMAGE_ATOMIC_UMIN: return IMAGE_ATOMIC(AtomicOp::Umin, inst); + case Opcode::IMAGE_ATOMIC_FMIN: + return IMAGE_ATOMIC(AtomicOp::Fmin, inst); case Opcode::IMAGE_ATOMIC_SMAX: return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_FMAX: + return IMAGE_ATOMIC(AtomicOp::Fmax, inst); case Opcode::IMAGE_ATOMIC_UMAX: return IMAGE_ATOMIC(AtomicOp::Umax, inst); case Opcode::IMAGE_ATOMIC_AND: @@ -466,6 +470,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { return ir.ImageAtomicIMax(handle, body, value, true, info); case AtomicOp::Umax: return ir.ImageAtomicUMax(handle, body, value, info); + case AtomicOp::Fmax: + return ir.ImageAtomicFMax(handle, body, value, info); + case AtomicOp::Fmin: + return ir.ImageAtomicFMin(handle, body, value, info); case AtomicOp::And: return ir.ImageAtomicAnd(handle, body, value, info); case AtomicOp::Or: diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 8dcf9c5c4..784f8b4d2 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -196,6 +196,7 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; + bool uses_atomic_float_min_max{}; bool uses_lane_id{}; bool uses_group_quad{}; bool uses_group_ballot{}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e1ebf2206..01d945178 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value); } +Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value); +} + Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info) { return is_signed ? ImageAtomicSMax(handle, coords, value, info) diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d978b3b4f..8f8a12736 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -321,6 +321,10 @@ public: const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info); [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6f186808c..ab6dbfde9 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, ) +OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, ) OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index d739b2da5..f53a0f4d4 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::ImageQueryLod: info.has_image_query = true; break; + case IR::Opcode::ImageAtomicFMax32: + case IR::Opcode::ImageAtomicFMin32: + info.uses_atomic_float_min_max = true; + break; case IR::Opcode::LaneId: info.uses_lane_id = true; break; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9aac6230a..853e4854d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -29,6 +29,7 @@ struct Profile { bool supports_native_cube_calc{}; bool supports_trinary_minmax{}; bool supports_robust_buffer_access{}; + bool supports_image_fp32_atomic_min_max{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 072807124..99f225d79 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -210,7 +210,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceRobustness2FeaturesEXT, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, - vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); + vk::PhysicalDevicePortabilitySubsetFeaturesKHR, + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>(); features = feature_chain.get().features; const vk::StructureChain properties_chain = physical_device.getProperties2< @@ -272,6 +273,13 @@ bool Instance::CreateDevice() { image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME); + shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME); + if (shader_atomic_float2) { + shader_atomic_float2_features = + feature_chain.get(); + LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", + shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); + } const bool calibrated_timestamps = TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; @@ -401,6 +409,10 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ .legacyVertexAttributes = true, }, + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{ + .shaderImageFloat32AtomicMinMax = + shader_atomic_float2_features.shaderImageFloat32AtomicMinMax, + }, #ifdef __APPLE__ portability_features, #endif @@ -430,6 +442,9 @@ bool Instance::CreateDevice() { if (!legacy_vertex_attributes) { device_chain.unlink(); } + if (!shader_atomic_float2) { + device_chain.unlink(); + } auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); if (device_result != vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index bf9af1f24..573473869 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -165,6 +165,12 @@ public: return amd_shader_trinary_minmax; } + /// Returns true when the shaderImageFloat32AtomicMinMax feature of + /// VK_EXT_shader_atomic_float2 is supported. + bool IsShaderAtomicFloatImage32MinMaxSupported() const { + return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -336,6 +342,7 @@ private: vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features; vk::DriverIdKHR driver_id; vk::UniqueDebugUtilsMessengerEXT debug_callback{}; std::string vendor_name; @@ -360,6 +367,7 @@ private: bool image_load_store_lod{}; bool amd_gcn_shader{}; bool amd_shader_trinary_minmax{}; + bool shader_atomic_float2{}; bool portability_subset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b991cda0..0a0c81d4c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -206,6 +206,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), .supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(), + .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || From ede60e8f7f08a6930fe5cec9c77fa42ab5cafb95 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 30 Apr 2025 11:43:51 -0700 Subject: [PATCH 03/25] fix: Do not declare atomic float capability when not supported. --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ff38bb5d8..9ebb842cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -270,7 +270,7 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } - if (info.uses_atomic_float_min_max) { + if (info.uses_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) { ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); } From c47d9b2ad6e890dd3c1b9361f0cf25119e5247e7 Mon Sep 17 00:00:00 2001 From: anna12831920 <204280147+anna12831920@users.noreply.github.com> Date: Thu, 1 May 2025 07:56:44 +1200 Subject: [PATCH 04/25] Export eboot address (#2866) --- src/common/memory_patcher.cpp | 2 +- src/common/memory_patcher.h | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/common/memory_patcher.cpp b/src/common/memory_patcher.cpp index bb2d23c45..cb51828cc 100644 --- a/src/common/memory_patcher.cpp +++ b/src/common/memory_patcher.cpp @@ -23,7 +23,7 @@ namespace MemoryPatcher { -uintptr_t g_eboot_address; +EXPORT uintptr_t g_eboot_address; uint64_t g_eboot_image_size; std::string g_game_serial; std::string patchFile; diff --git a/src/common/memory_patcher.h b/src/common/memory_patcher.h index 29045a6a2..968903a85 100644 --- a/src/common/memory_patcher.h +++ b/src/common/memory_patcher.h @@ -6,9 +6,15 @@ #include #include +#if defined(WIN32) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT __attribute__((visibility("default"))) +#endif + namespace MemoryPatcher { -extern uintptr_t g_eboot_address; +extern EXPORT uintptr_t g_eboot_address; extern uint64_t g_eboot_image_size; extern std::string g_game_serial; extern std::string patchFile; From 10b24d04bc3d3f09b11ccf5e1b1ad328cc6395e3 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:54:45 -0700 Subject: [PATCH 05/25] fix: Add new image atomic instructions to relevant lists. --- src/shader_recompiler/ir/microinstruction.cpp | 2 ++ src/shader_recompiler/ir/passes/resource_tracking_pass.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 580156f5b..a57310fb9 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -94,6 +94,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicUMin32: case Opcode::ImageAtomicSMax32: case Opcode::ImageAtomicUMax32: + case Opcode::ImageAtomicFMax32: + case Opcode::ImageAtomicFMin32: case Opcode::ImageAtomicInc32: case Opcode::ImageAtomicDec32: case Opcode::ImageAtomicAnd32: diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 778da149f..1de255e4d 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -101,6 +101,8 @@ bool IsImageAtomicInstruction(const IR::Inst& inst) { case IR::Opcode::ImageAtomicUMin32: case IR::Opcode::ImageAtomicSMax32: case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicFMax32: + case IR::Opcode::ImageAtomicFMin32: case IR::Opcode::ImageAtomicInc32: case IR::Opcode::ImageAtomicDec32: case IR::Opcode::ImageAtomicAnd32: From 5fd5b6253997f9fbfe67ed2e866eb5dfbac772ac Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 30 Apr 2025 20:46:16 -0700 Subject: [PATCH 06/25] shader_recompiler: Few fixes for buffer number conversions. (#2869) * liverpool: Pass correct color buffer number type for conversion mapping. * shader_recompiler: Apply number conversion to vertex inputs. --- .../frontend/translate/translate.cpp | 4 +++- src/video_core/amdgpu/liverpool.h | 15 +++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index c5a5814a4..e49f95d9a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -517,7 +517,9 @@ void Translator::EmitFetch(const GcnInst& inst) { const auto values = ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); - const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); + const auto converted = + IR::ApplyReadNumberConversionVec4(ir, values, buffer.GetNumberConversion()); + const auto swizzled = ApplySwizzle(ir, converted, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 8f9292f1c..5928a6313 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -924,15 +924,11 @@ struct Liverpool { } [[nodiscard]] NumberFormat GetNumberFmt() const { - // There is a small difference between T# and CB number types, account for it. - return RemapNumberFormat(info.number_type == NumberFormat::SnormNz - ? NumberFormat::Srgb - : info.number_type.Value(), - info.format); + return RemapNumberFormat(GetFixedNumberFormat(), info.format); } [[nodiscard]] NumberConversion GetNumberConversion() const { - return MapNumberConversion(info.number_type); + return MapNumberConversion(GetFixedNumberFormat()); } [[nodiscard]] CompMapping Swizzle() const { @@ -973,6 +969,13 @@ struct Liverpool { const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; return RemapSwizzle(info.format, mrt_swizzle); } + + private: + [[nodiscard]] NumberFormat GetFixedNumberFormat() const { + // There is a small difference between T# and CB number types, account for it. + return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb + : info.number_type.Value(); + } }; enum ContextRegs : u32 { From 4202d9d621fea5fd5686656b296b97d64e3f3582 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 30 Apr 2025 23:37:37 -0700 Subject: [PATCH 07/25] fix: Add missing ctime includes. --- src/core/devices/random_device.cpp | 1 + src/core/devices/srandom_device.cpp | 1 + src/core/devices/urandom_device.cpp | 1 + src/core/devtools/widget/frame_dump.cpp | 1 + 4 files changed, 4 insertions(+) diff --git a/src/core/devices/random_device.cpp b/src/core/devices/random_device.cpp index 50934e3b8..b2754fe58 100644 --- a/src/core/devices/random_device.cpp +++ b/src/core/devices/random_device.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/logging/log.h" #include "random_device.h" diff --git a/src/core/devices/srandom_device.cpp b/src/core/devices/srandom_device.cpp index ab78ddbe2..5e51b1c39 100644 --- a/src/core/devices/srandom_device.cpp +++ b/src/core/devices/srandom_device.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/logging/log.h" #include "srandom_device.h" diff --git a/src/core/devices/urandom_device.cpp b/src/core/devices/urandom_device.cpp index c001aab83..7318a6ff7 100644 --- a/src/core/devices/urandom_device.cpp +++ b/src/core/devices/urandom_device.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/logging/log.h" #include "urandom_device.h" diff --git a/src/core/devtools/widget/frame_dump.cpp b/src/core/devtools/widget/frame_dump.cpp index 646ccb6d6..2445bdcb5 100644 --- a/src/core/devtools/widget/frame_dump.cpp +++ b/src/core/devtools/widget/frame_dump.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include From b0e4e87ff3f7206df63988eab7593163253d987b Mon Sep 17 00:00:00 2001 From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com> Date: Thu, 1 May 2025 12:12:15 +0300 Subject: [PATCH 08/25] Implement SnormNz conversion (#2841) * + * + * Unpack Snorm 2x16 * + * SintToSnormNz * all is broken ig.... * review changes * my stupid ass messed all while trying to resolve the conflicts.. * + * + * fix rebase * clang-format fix (1) * clang-format fix (2) --------- Co-authored-by: squidbus <175574877+squidbus@users.noreply.github.com> --- CMakeLists.txt | 0 .../ir/passes/lower_buffer_format_to_raw.cpp | 3 ++- src/shader_recompiler/ir/reinterpret.h | 26 +++++++++++++++++++ src/video_core/amdgpu/liverpool.h | 2 +- src/video_core/amdgpu/resource.h | 4 +-- src/video_core/amdgpu/types.h | 21 +++++++++++++-- 6 files changed, 50 insertions(+), 6 deletions(-) mode change 100755 => 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100755 new mode 100644 diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp index 658a495bc..65be02541 100644 --- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp +++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp @@ -206,7 +206,8 @@ static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) .swizzle = is_inst_typed ? AmdGpu::RemapSwizzle(flags.inst_data_fmt.Value(), AmdGpu::IdentityMapping) : buffer.DstSelect(), - .num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value()) + .num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value(), + flags.inst_data_fmt.Value()) : buffer.GetNumberConversion(), .num_components = AmdGpu::NumComponents(data_format), }; diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h index b65b19928..99819cbb9 100644 --- a/src/shader_recompiler/ir/reinterpret.h +++ b/src/shader_recompiler/ir/reinterpret.h @@ -34,6 +34,18 @@ inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value, case AmdGpu::NumberConversion::UnormToUbnorm: // Convert 0...1 to -1...1 return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f)); + case AmdGpu::NumberConversion::Sint8ToSnormNz: { + const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast(value), ir.Imm32(2)), ir.Imm32(1)); + const IR::F32 left = ir.ConvertSToF(32, 32, additon); + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + return ir.FPDiv(left, max); + } + case AmdGpu::NumberConversion::Sint16ToSnormNz: { + const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast(value), ir.Imm32(2)), ir.Imm32(1)); + const IR::F32 left = ir.ConvertSToF(32, 32, additon); + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + return ir.FPDiv(left, max); + } default: UNREACHABLE(); } @@ -66,6 +78,20 @@ inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value, case AmdGpu::NumberConversion::UnormToUbnorm: // Convert -1...1 to 0...1 return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f)); + case AmdGpu::NumberConversion::Sint8ToSnormNz: { + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max); + const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f)); + const IR::U32 raw = ir.ConvertFToS(32, ir.FPDiv(left, ir.Imm32(2.f))); + return ir.BitCast(raw); + } + case AmdGpu::NumberConversion::Sint16ToSnormNz: { + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max); + const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f)); + const IR::U32 raw = ir.ConvertFToS(32, ir.FPDiv(left, ir.Imm32(2.f))); + return ir.BitCast(raw); + } default: UNREACHABLE(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 5928a6313..c4bebd05f 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -928,7 +928,7 @@ struct Liverpool { } [[nodiscard]] NumberConversion GetNumberConversion() const { - return MapNumberConversion(GetFixedNumberFormat()); + return MapNumberConversion(GetFixedNumberFormat(), info.format); } [[nodiscard]] CompMapping Swizzle() const { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 64a85c812..c387c7bf2 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -68,7 +68,7 @@ struct Buffer { } NumberConversion GetNumberConversion() const noexcept { - return MapNumberConversion(NumberFormat(num_format)); + return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -292,7 +292,7 @@ struct Image { } NumberConversion GetNumberConversion() const noexcept { - return MapNumberConversion(NumberFormat(num_format)); + return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index d1cf19076..ab0df689e 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -197,6 +197,8 @@ enum class NumberConversion : u32 { UintToUscaled = 1, SintToSscaled = 2, UnormToUbnorm = 3, + Sint8ToSnormNz = 5, + Sint16ToSnormNz = 6, }; struct CompMapping { @@ -287,6 +289,7 @@ inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataForma case NumberFormat::Uscaled: return NumberFormat::Uint; case NumberFormat::Sscaled: + case NumberFormat::SnormNz: return NumberFormat::Sint; case NumberFormat::Ubnorm: return NumberFormat::Unorm; @@ -336,14 +339,28 @@ inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizz } } -inline NumberConversion MapNumberConversion(const NumberFormat format) { - switch (format) { +inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) { + switch (num_fmt) { case NumberFormat::Uscaled: return NumberConversion::UintToUscaled; case NumberFormat::Sscaled: return NumberConversion::SintToSscaled; case NumberFormat::Ubnorm: return NumberConversion::UnormToUbnorm; + case NumberFormat::SnormNz: { + switch (data_fmt) { + case DataFormat::Format8: + case DataFormat::Format8_8: + case DataFormat::Format8_8_8_8: + return NumberConversion::Sint8ToSnormNz; + case DataFormat::Format16: + case DataFormat::Format16_16: + case DataFormat::Format16_16_16_16: + return NumberConversion::Sint16ToSnormNz; + default: + UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt)); + } + } default: return NumberConversion::None; } From 6c39bf229c006b76ad43f5c79e527ff383f4dc80 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Thu, 1 May 2025 06:47:43 -0500 Subject: [PATCH 09/25] libkernel: Various filesystem fixes (#2868) * Proper handling of whence 3 & 4 * Accurate directory handling in open Directories can be opened, and can be created in open, these changes should handle that more accurately. * Mount /app0 as read only On real hardware, it's read only. * Proper directory flag handling. Even when directory is specified, it will still succeed to open non-directories. * Check for read only directories * Earlier ro check in posix_rmdir Hardware tests suggest these checks are in a different order * Clear temp folder on boot My tests rely on this, and some games do too. Two birds with one stone * Clang * Add missing DeleteHandle calls Whoops * Final flags adjustment in sceKernelOpen All my current tests are now hardware accurate. * Fix truncates Host ftruncate consistently fails on EINVAL, I'll need to test if this issue affected Windows too. * Windows hacks Windows is more limiting about how folders are opened and things like that. For now, pretend these calls didn't error. Also fixes compilation for Windows * Final touch-ups After expanding my test suite further, I found a couple more edge cases that needed addressing. Bloodborne audio is still broken, I'll look into that soon. * Remove hacky read-only behavior in posix_stat Bloodborne apparently uses the mode parameter here when querying it's audio files, and the mode we returned led to it disabling audio entirely. * Clang * Cleaner code * Combine fsync and sync flags According to FreeBSD docs, the "sync" flag is synonymous with the fsync flag, and is only included to meet the POSIX spec. * Log if any currently unhandled flags are encountered. These are rare and probably not too important, but log a warning when they're seen. * Update file_system.cpp * Update file_system.cpp * Clang * Revert truncate fix Using ftruncate works fine after moving the call to before the proper file opening code. * Truncate before open Open the file as read-write, then try truncating. This fixes read | truncate flag behavior on Windows. * Slightly adjust check for invalid flags Any open call with invalid flags should return EINVAL, regardless of other errors parameters might cause. --- src/common/io_file.cpp | 4 +- src/common/io_file.h | 2 - src/core/libraries/kernel/file_system.cpp | 198 ++++++++++++++-------- src/emulator.cpp | 14 +- 4 files changed, 133 insertions(+), 85 deletions(-) diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index 3efadc6ea..6fa9062a7 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -131,9 +131,7 @@ namespace { case SeekOrigin::End: return SEEK_END; default: - LOG_ERROR(Common_Filesystem, "Unsupported origin {}, defaulting to SEEK_SET", - static_cast(origin)); - return SEEK_SET; + UNREACHABLE_MSG("Impossible SeekOrigin {}", static_cast(origin)); } } diff --git a/src/common/io_file.h b/src/common/io_file.h index fb20a2bc5..45787a092 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -61,8 +61,6 @@ enum class SeekOrigin : u32 { SetOrigin, // Seeks from the start of the file. CurrentPosition, // Seeks from the current file pointer position. End, // Seeks from the end of the file. - SeekHole, // Seeks from the start of the next hole in the file. - SeekData, // Seeks from the start of the next non-hole region in the file. }; class IOFile final { diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index bcfa15a62..cb1fd14a2 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -67,10 +67,16 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { bool write = (flags & 0x3) == ORBIS_KERNEL_O_WRONLY; bool rdwr = (flags & 0x3) == ORBIS_KERNEL_O_RDWR; + if (!read && !write && !rdwr) { + // Start by checking for invalid flags. + *__Error() = POSIX_EINVAL; + return -1; + } + bool nonblock = (flags & ORBIS_KERNEL_O_NONBLOCK) != 0; bool append = (flags & ORBIS_KERNEL_O_APPEND) != 0; - bool fsync = (flags & ORBIS_KERNEL_O_FSYNC) != 0; - bool sync = (flags & ORBIS_KERNEL_O_SYNC) != 0; + // Flags fsync and sync behave the same + bool sync = (flags & ORBIS_KERNEL_O_SYNC) != 0 || (flags & ORBIS_KERNEL_O_FSYNC) != 0; bool create = (flags & ORBIS_KERNEL_O_CREAT) != 0; bool truncate = (flags & ORBIS_KERNEL_O_TRUNC) != 0; bool excl = (flags & ORBIS_KERNEL_O_EXCL) != 0; @@ -78,6 +84,10 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { bool direct = (flags & ORBIS_KERNEL_O_DIRECT) != 0; bool directory = (flags & ORBIS_KERNEL_O_DIRECTORY) != 0; + if (sync || direct || dsync || nonblock) { + LOG_WARNING(Kernel_Fs, "flags {:#x} not fully handled", flags); + } + std::string_view path{raw_path}; u32 handle = h->CreateHandle(); auto* file = h->GetFile(handle); @@ -94,84 +104,126 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { } } - if (directory) { - file->type = Core::FileSys::FileType::Directory; - file->m_guest_name = path; - file->m_host_name = mnt->GetHostPath(file->m_guest_name); - if (!std::filesystem::is_directory(file->m_host_name)) { // directory doesn't exist + bool read_only = false; + file->m_guest_name = path; + file->m_host_name = mnt->GetHostPath(file->m_guest_name, &read_only); + bool exists = std::filesystem::exists(file->m_host_name); + s32 e = 0; + + if (create) { + if (excl && exists) { + // Error if file exists h->DeleteHandle(handle); - *__Error() = POSIX_ENOENT; + *__Error() = POSIX_EEXIST; + return -1; + } + + if (read_only) { + // Can't create files in a read only directory + h->DeleteHandle(handle); + *__Error() = POSIX_EROFS; + return -1; + } + // Create a file if it doesn't exist + Common::FS::IOFile out(file->m_host_name, Common::FS::FileAccessMode::Write); + } else if (!exists) { + // If we're not creating a file, and it doesn't exist, return ENOENT + h->DeleteHandle(handle); + *__Error() = POSIX_ENOENT; + return -1; + } + + if (std::filesystem::is_directory(file->m_host_name) || directory) { + // Directories can be opened even if the directory flag isn't set. + // In these cases, error behavior is identical to the directory code path. + directory = true; + } + + if (directory) { + if (!std::filesystem::is_directory(file->m_host_name)) { + // If the opened file is not a directory, return ENOTDIR. + // This will trigger when create & directory is specified, this is expected. + h->DeleteHandle(handle); + *__Error() = POSIX_ENOTDIR; + return -1; + } + + file->type = Core::FileSys::FileType::Directory; + + // Populate directory contents + mnt->IterateDirectory(file->m_guest_name, + [&file](const auto& ent_path, const auto ent_is_file) { + auto& dir_entry = file->dirents.emplace_back(); + dir_entry.name = ent_path.filename().string(); + dir_entry.isFile = ent_is_file; + }); + file->dirents_index = 0; + + if (read) { + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read); + } else if (write || rdwr) { + // Cannot open directories with any type of write access + h->DeleteHandle(handle); + *__Error() = POSIX_EISDIR; + return -1; + } + + if (e == EACCES) { + // Hack to bypass some platform limitations, ignore the error and continue as normal. + LOG_WARNING(Kernel_Fs, "Opening directories is not fully supported on this platform"); + e = 0; + } + + if (truncate) { + // Cannot open directories with truncate + h->DeleteHandle(handle); + *__Error() = POSIX_EISDIR; return -1; - } else { - if (create) { - return handle; // dir already exists - } else { - mnt->IterateDirectory(file->m_guest_name, - [&file](const auto& ent_path, const auto ent_is_file) { - auto& dir_entry = file->dirents.emplace_back(); - dir_entry.name = ent_path.filename().string(); - dir_entry.isFile = ent_is_file; - }); - file->dirents_index = 0; - } } } else { - file->m_guest_name = path; - file->m_host_name = mnt->GetHostPath(file->m_guest_name); - bool exists = std::filesystem::exists(file->m_host_name); - int e = 0; + // Start by opening as read-write so we can truncate regardless of flags. + // Since open starts by closing the file, this won't interfere with later open calls. + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); - if (create) { - if (excl && exists) { - // Error if file exists - h->DeleteHandle(handle); - *__Error() = POSIX_EEXIST; - return -1; - } - // Create file if it doesn't exist - Common::FS::IOFile out(file->m_host_name, Common::FS::FileAccessMode::Write); - } else if (!exists) { - // File to open doesn't exist, return ENOENT + file->type = Core::FileSys::FileType::Regular; + + if (truncate && read_only) { + // Can't open files with truncate flag in a read only directory h->DeleteHandle(handle); - *__Error() = POSIX_ENOENT; + *__Error() = POSIX_EROFS; return -1; + } else if (truncate && e == 0) { + // If the file was opened successfully and truncate was enabled, reduce size to 0 + file->f.SetSize(0); } if (read) { // Read only e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read); + } else if (read_only) { + // Can't open files with write/read-write access in a read only directory + h->DeleteHandle(handle); + *__Error() = POSIX_EROFS; + return -1; + } else if (append) { + // Append can be specified with rdwr or write, but we treat it as a separate mode. + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); } else if (write) { // Write only - if (append) { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); - } else { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write); - } + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write); } else if (rdwr) { // Read and write - if (append) { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); - } else { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); - } - } else { - // Invalid flags - *__Error() = POSIX_EINVAL; - return -1; - } - - if (truncate && e == 0) { - // If the file was opened successfully and truncate was enabled, reduce size to 0 - file->f.SetSize(0); - } - - if (e != 0) { - // Open failed in platform-specific code, errno needs to be converted. - h->DeleteHandle(handle); - SetPosixErrno(e); - return -1; + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); } } + + if (e != 0) { + // Open failed in platform-specific code, errno needs to be converted. + h->DeleteHandle(handle); + SetPosixErrno(e); + return -1; + } + file->is_opened = true; return handle; } @@ -365,10 +417,10 @@ s64 PS4_SYSV_ABI posix_lseek(s32 fd, s64 offset, s32 whence) { origin = Common::FS::SeekOrigin::CurrentPosition; } else if (whence == 2) { origin = Common::FS::SeekOrigin::End; - } else if (whence == 3) { - origin = Common::FS::SeekOrigin::SeekHole; - } else if (whence == 4) { - origin = Common::FS::SeekOrigin::SeekData; + } else if (whence == 3 || whence == 4) { + // whence parameter belongs to an unsupported POSIX extension + *__Error() = POSIX_ENOTTY; + return -1; } else { // whence parameter is invalid *__Error() = POSIX_EINVAL; @@ -486,13 +538,13 @@ s32 PS4_SYSV_ABI posix_rmdir(const char* path) { const std::filesystem::path dir_name = mnt->GetHostPath(path, &ro); - if (dir_name.empty() || !std::filesystem::is_directory(dir_name)) { - *__Error() = POSIX_ENOTDIR; + if (ro) { + *__Error() = POSIX_EROFS; return -1; } - if (ro) { - *__Error() = POSIX_EROFS; + if (dir_name.empty() || !std::filesystem::is_directory(dir_name)) { + *__Error() = POSIX_ENOTDIR; return -1; } @@ -523,8 +575,7 @@ s32 PS4_SYSV_ABI sceKernelRmdir(const char* path) { s32 PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); auto* mnt = Common::Singleton::Instance(); - bool ro = false; - const auto path_name = mnt->GetHostPath(path, &ro); + const auto path_name = mnt->GetHostPath(path); std::memset(sb, 0, sizeof(OrbisKernelStat)); const bool is_dir = std::filesystem::is_directory(path_name); const bool is_file = std::filesystem::is_regular_file(path_name); @@ -545,9 +596,6 @@ s32 PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { sb->st_blocks = (sb->st_size + 511) / 512; // TODO incomplete } - if (ro) { - sb->st_mode &= ~0000555u; - } return ORBIS_OK; } diff --git a/src/emulator.cpp b/src/emulator.cpp index 5c20353df..448b8aad4 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -98,9 +98,9 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector::Instance(); - mnt->Mount(game_folder, "/app0"); + mnt->Mount(game_folder, "/app0", true); // Certain games may use /hostapp as well such as CUSA001100 - mnt->Mount(game_folder, "/hostapp"); + mnt->Mount(game_folder, "/hostapp", true); auto& game_info = Common::ElfInfo::Instance(); @@ -231,11 +231,15 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorMount(mount_data_dir, "/data"); // should just exist, manually create with game serial + + // Mounting temp folders const auto& mount_temp_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id; - if (!std::filesystem::exists(mount_temp_dir)) { - std::filesystem::create_directory(mount_temp_dir); + if (std::filesystem::exists(mount_temp_dir)) { + // Temp folder should be cleared on each boot. + std::filesystem::remove_all(mount_temp_dir); } - mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir + std::filesystem::create_directory(mount_temp_dir); + mnt->Mount(mount_temp_dir, "/temp0"); mnt->Mount(mount_temp_dir, "/temp"); const auto& mount_download_dir = From eb09c4ccce4ca6ab8ed08f9e53926c52dd52d8a5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 1 May 2025 20:10:42 -0700 Subject: [PATCH 10/25] vk_presenter: Use correct format for output frame image and view. (#2871) --- .../renderer_vulkan/vk_presenter.cpp | 22 +++++++++++++++++-- src/video_core/renderer_vulkan/vk_presenter.h | 8 ++++--- src/video_core/texture_cache/image_info.cpp | 7 +++--- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 6bd4b26fa..09dd23cb6 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -270,7 +270,25 @@ Frame* Presenter::PrepareLastFrame() { return frame; } -Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) { +static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat format) { + switch (format) { + case Libraries::VideoOut::PixelFormat::A8B8G8R8Srgb: + return vk::Format::eR8G8B8A8Srgb; + case Libraries::VideoOut::PixelFormat::A8R8G8B8Srgb: + return vk::Format::eB8G8R8A8Srgb; + case Libraries::VideoOut::PixelFormat::A2R10G10B10: + case Libraries::VideoOut::PixelFormat::A2R10G10B10Srgb: + case Libraries::VideoOut::PixelFormat::A2R10G10B10Bt2020Pq: + return vk::Format::eA2R10G10B10UnormPack32; + default: + break; + } + UNREACHABLE_MSG("Unknown format={}", static_cast(format)); + return {}; +} + +Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, + const Libraries::VideoOut::PixelFormat format, bool is_eop) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); @@ -324,7 +342,7 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) cmdbuf); VideoCore::ImageViewInfo info{}; - info.format = image.info.pixel_format; + info.format = GetFrameViewFormat(format); // Exclude alpha from output frame to avoid blending with UI. info.mapping = vk::ComponentMapping{ .r = vk::ComponentSwizzle::eIdentity, diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index ad2708474..8ed2052ee 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -70,11 +70,12 @@ public: auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(desc); texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); - return PrepareFrameInternal(image_id, is_eop); + return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop); } Frame* PrepareBlankFrame(bool is_eop) { - return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, is_eop); + return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, + Libraries::VideoOut::PixelFormat::Unknown, is_eop); } VideoCore::Image& RegisterVideoOutSurface( @@ -119,7 +120,8 @@ public: } private: - Frame* PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop = true); + Frame* PrepareFrameInternal(VideoCore::ImageId image_id, + Libraries::VideoOut::PixelFormat format, bool is_eop = true); Frame* GetRenderFrame(); void SetExpectedGameSize(s32 width, s32 height); diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 26928eaf7..39322f449 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,14 +16,15 @@ using VideoOutFormat = Libraries::VideoOut::PixelFormat; static vk::Format ConvertPixelFormat(const VideoOutFormat format) { switch (format) { - case VideoOutFormat::A8R8G8B8Srgb: - return vk::Format::eB8G8R8A8Srgb; case VideoOutFormat::A8B8G8R8Srgb: + // Remaining formats are mapped to RGBA for internal consistency and changed to BGRA in the + // frame image view. + case VideoOutFormat::A8R8G8B8Srgb: return vk::Format::eR8G8B8A8Srgb; case VideoOutFormat::A2R10G10B10: case VideoOutFormat::A2R10G10B10Srgb: case VideoOutFormat::A2R10G10B10Bt2020Pq: - return vk::Format::eA2R10G10B10UnormPack32; + return vk::Format::eA2B10G10R10UnormPack32; default: break; } From 0ba9ea6a3b98d6454164ac12a29baef30f1ef595 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 2 May 2025 13:22:05 -0500 Subject: [PATCH 11/25] Only perform early read-write open when truncating is needed (#2874) Should stop some fs error spam when games open files from /app0, as this open call would fail from reduced permissions. --- src/core/libraries/kernel/file_system.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index cb1fd14a2..ad372325c 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -181,10 +181,6 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { return -1; } } else { - // Start by opening as read-write so we can truncate regardless of flags. - // Since open starts by closing the file, this won't interfere with later open calls. - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); - file->type = Core::FileSys::FileType::Regular; if (truncate && read_only) { @@ -192,9 +188,14 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { h->DeleteHandle(handle); *__Error() = POSIX_EROFS; return -1; - } else if (truncate && e == 0) { - // If the file was opened successfully and truncate was enabled, reduce size to 0 - file->f.SetSize(0); + } else if (truncate) { + // Open the file as read-write so we can truncate regardless of flags. + // Since open starts by closing the file, this won't interfere with later open calls. + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); + if (e == 0) { + // If the file was opened successfully, reduce size to 0 + file->f.SetSize(0); + } } if (read) { From d542d952f4bbff97ef71ed61beac6da0813ac84a Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Sat, 3 May 2025 12:51:10 -0300 Subject: [PATCH 12/25] Savefixes VIII (#2851) * savedata dialog: fix SaveDialogUi move semantics fix possible dangling points * savedata dialog: removed unnecessary firmware version checks --- .../save_data/dialog/savedatadialog_ui.cpp | 23 ++++++++++--------- .../save_data/dialog/savedatadialog_ui.h | 3 ++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp index a6ca8744d..edb5caa07 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp @@ -49,13 +49,11 @@ void SaveDialogResult::CopyTo(OrbisSaveDataDialogResult& result) const { result.mode = this->mode; result.result = this->result; result.buttonId = this->button_id; - if (mode == SaveDataDialogMode::LIST || ElfInfo::Instance().FirmwareVer() >= ElfInfo::FW_45) { - if (result.dirName != nullptr) { - result.dirName->data.FromString(this->dir_name); - } - if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { - result.param->FromSFO(this->param); - } + if (result.dirName != nullptr) { + result.dirName->data.FromString(this->dir_name); + } + if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { + result.param->FromSFO(this->param); } result.userData = this->user_data; } @@ -345,12 +343,15 @@ SaveDialogUi::SaveDialogUi(SaveDialogUi&& other) noexcept } } -SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi other) { +SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi&& other) noexcept { std::scoped_lock lock(draw_mutex, other.draw_mutex); using std::swap; - swap(state, other.state); - swap(status, other.status); - swap(result, other.result); + state = other.state; + other.state = nullptr; + status = other.status; + other.status = nullptr; + result = other.result; + other.result = nullptr; if (status && *status == Status::RUNNING) { first_render = true; AddLayer(this); diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.h b/src/core/libraries/save_data/dialog/savedatadialog_ui.h index aa67e1f5f..dc97268f4 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.h +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.h @@ -300,7 +300,8 @@ public: ~SaveDialogUi() override; SaveDialogUi(const SaveDialogUi& other) = delete; SaveDialogUi(SaveDialogUi&& other) noexcept; - SaveDialogUi& operator=(SaveDialogUi other); + SaveDialogUi& operator=(SaveDialogUi& other) = delete; + SaveDialogUi& operator=(SaveDialogUi&& other) noexcept; void Finish(ButtonId buttonId, CommonDialog::Result r = CommonDialog::Result::OK); From 17b6343f18a39bbd6436a94956fb6cb90f8f554c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 3 May 2025 13:47:03 -0700 Subject: [PATCH 13/25] emulator: Fix log initialization order. (#2878) --- src/emulator.cpp | 184 ++++++++++++++++++++++++----------------------- 1 file changed, 95 insertions(+), 89 deletions(-) diff --git a/src/emulator.cpp b/src/emulator.cpp index 448b8aad4..ebb34054b 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -10,7 +10,6 @@ #include "common/logging/log.h" #ifdef ENABLE_QT_GUI #include -#include "common/memory_patcher.h" #endif #include "common/assert.h" #ifdef ENABLE_DISCORD_RPC @@ -20,6 +19,7 @@ #include #endif #include "common/elf_info.h" +#include "common/memory_patcher.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" @@ -54,27 +54,6 @@ Emulator::Emulator() { WSADATA wsaData; WSAStartup(versionWanted, &wsaData); #endif - - // Create stdin/stdout/stderr - Common::Singleton::Instance()->CreateStdHandles(); - - // Defer until after logging is initialized. - memory = Core::Memory::Instance(); - controller = Common::Singleton::Instance(); - linker = Common::Singleton::Instance(); - - // Load renderdoc module. - VideoCore::LoadRenderDoc(); - - // Start the timer (Play Time) -#ifdef ENABLE_QT_GUI - start_time = std::chrono::steady_clock::now(); - const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - QString filePath = QString::fromStdString((user_dir / "play_time.txt").string()); - QFile file(filePath); - ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text), - "Error opening or creating play_time.txt"); -#endif } Emulator::~Emulator() { @@ -102,54 +81,89 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorMount(game_folder, "/hostapp", true); - auto& game_info = Common::ElfInfo::Instance(); + const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo"); + const auto param_sfo_exists = std::filesystem::exists(param_sfo_path); - // Loading param.sfo file if exists + // Load param.sfo details if it exists std::string id; std::string title; std::string app_version; u32 fw_version; Common::PSFAttributes psf_attributes{}; - - const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo"); - if (!std::filesystem::exists(param_sfo_path) || !Config::getSeparateLogFilesEnabled()) { - Common::Log::Initialize(); - Common::Log::Start(); - } - - if (std::filesystem::exists(param_sfo_path)) { + if (param_sfo_exists) { auto* param_sfo = Common::Singleton::Instance(); - const bool success = param_sfo->Open(param_sfo_path); - ASSERT_MSG(success, "Failed to open param.sfo"); + ASSERT_MSG(param_sfo->Open(param_sfo_path), "Failed to open param.sfo"); + const auto content_id = param_sfo->GetString("CONTENT_ID"); ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID"); + id = std::string(*content_id, 7, 9); - - if (Config::getSeparateLogFilesEnabled()) { - Common::Log::Initialize(id + ".log"); - Common::Log::Start(); + title = param_sfo->GetString("TITLE").value_or("Unknown title"); + fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); + app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); + if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) { + psf_attributes.raw = *raw_attributes; } - LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version); - LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); - LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); - LOG_INFO(Loader, "Description {}", Common::g_scm_desc); - LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url); + } - LOG_INFO(Config, "General LogType: {}", Config::getLogType()); - LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); - LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); - LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); - LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); - LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); - LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled()); - LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); - LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); - LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled()); - LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled()); - LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled()); - LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); + // Initialize logging as soon as possible + if (!id.empty() && Config::getSeparateLogFilesEnabled()) { + Common::Log::Initialize(id + ".log"); + } else { + Common::Log::Initialize(); + } + Common::Log::Start(); + LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version); + LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); + LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); + LOG_INFO(Loader, "Description {}", Common::g_scm_desc); + LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url); + + LOG_INFO(Config, "General LogType: {}", Config::getLogType()); + LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); + LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); + LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); + LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); + LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); + LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled()); + LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); + LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); + LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled()); + LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled()); + LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled()); + LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); + + if (param_sfo_exists) { + LOG_INFO(Loader, "Game id: {} Title: {}", id, title); + LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); + } + if (!args.empty()) { + const auto argc = std::min(args.size(), 32); + for (auto i = 0; i < argc; i++) { + LOG_INFO(Loader, "Game argument {}: {}", i, args[i]); + } + if (args.size() > 32) { + LOG_ERROR(Loader, "Too many game arguments, only passing the first 32"); + } + } + + // Create stdin/stdout/stderr + Common::Singleton::Instance()->CreateStdHandles(); + + // Initialize components + memory = Core::Memory::Instance(); + controller = Common::Singleton::Instance(); + linker = Common::Singleton::Instance(); + + // Load renderdoc module + VideoCore::LoadRenderDoc(); + + // Initialize patcher and trophies + if (!id.empty()) { + MemoryPatcher::g_game_serial = id; Libraries::NpTrophy::game_serial = id; + const auto trophyDir = Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles"; if (!std::filesystem::exists(trophyDir)) { @@ -158,41 +172,9 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorstart(60000); // 60000 ms = 1 minute -#endif - title = param_sfo->GetString("TITLE").value_or("Unknown title"); - LOG_INFO(Loader, "Game id: {} Title: {}", id, title); - fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); - app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); - LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); - if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) { - psf_attributes.raw = *raw_attributes; - } - if (!args.empty()) { - int argc = std::min(args.size(), 32); - for (int i = 0; i < argc; i++) { - LOG_INFO(Loader, "Game argument {}: {}", i, args[i]); - } - if (args.size() > 32) { - LOG_ERROR(Loader, "Too many game arguments, only passing the first 32"); - } - } - } - - const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png"); - if (std::filesystem::exists(pic1_path)) { - game_info.splash_path = pic1_path; } + auto& game_info = Common::ElfInfo::Instance(); game_info.initialized = true; game_info.game_serial = id; game_info.title = title; @@ -201,6 +183,11 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorGetHostPath("/app0/sce_sys/pic1.png"); + if (std::filesystem::exists(pic1_path)) { + game_info.splash_path = pic1_path; + } + std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); std::string window_title = ""; if (Common::g_is_release) { @@ -284,6 +271,25 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorstart(60000); // 60000 ms = 1 minute + + start_time = std::chrono::steady_clock::now(); + const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + QString filePath = QString::fromStdString((user_dir / "play_time.txt").string()); + QFile file(filePath); + ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text), + "Error opening or creating play_time.txt"); + } +#endif + linker->Execute(args); window->InitTimers(); From 9a22185ab780ce7362b7a587b7defd16b456c460 Mon Sep 17 00:00:00 2001 From: oltolm Date: Sun, 4 May 2025 12:11:02 +0200 Subject: [PATCH 14/25] vulkan: do not use VK_EXT_extended_dynamic_state (#2880) fixes Bloodborne crashing on RX 580 --- src/video_core/buffer_cache/buffer_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cdf736a89..fb9fd755e 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -177,8 +177,8 @@ void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { if (instance.IsVertexInputDynamicState()) { cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); } else { - cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(), - host_sizes.data(), host_strides.data()); + cmdbuf.bindVertexBuffers2(0, num_buffers, host_buffers.data(), host_offsets.data(), + host_sizes.data(), host_strides.data()); } } From fed064931ad599f2de628cd9ad72c640da3f061b Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 5 May 2025 05:24:08 -0500 Subject: [PATCH 15/25] Core: Fix module load addresses (#2879) * Fix module map addresses Most modules are mapped starting at 0x800000000, with no gaps between mappings. * Hardcode hardware accurate base address Looking at our address space, all platforms will have this base address mapped, so there shouldn't be any problem in using it. * Clang * Swap module mapping to NoFlags, remove offset code Since real hardware has no gap between module mappings, the Fixed flag is just an annoyance to work around, and has no impact on the actual mappings. Swapping the module mappings to use flags NoFlags instead simplifies our code slightly. * Fix module mapping names On real hardware, the file extension is part of the mapping name. Easiest way to manage this is to swap the name to be `file.filename().string()` instead of `file.stem().string()` * Fix patches Completely missed this, whoops. --- src/core/address_space.h | 2 -- src/core/module.cpp | 13 +++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/core/address_space.h b/src/core/address_space.h index 7ccc2cd1e..d7f3efc75 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -19,8 +19,6 @@ enum class MemoryPermission : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) -constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; - constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; diff --git a/src/core/module.cpp b/src/core/module.cpp index cbe44457c..f31bbed6c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -19,8 +19,7 @@ namespace Core { using EntryFunc = PS4_SYSV_ABI int (*)(size_t args, const void* argp, void* param); -static u64 LoadOffset = CODE_BASE_OFFSET; -static constexpr u64 CODE_BASE_INCR = 0x010000000u; +static constexpr u64 ModuleLoadBase = 0x800000000; static u64 GetAlignedSize(const elf_program_header& phdr) { return (phdr.p_align != 0 ? (phdr.p_memsz + (phdr.p_align - 1)) & ~(phdr.p_align - 1) @@ -84,7 +83,7 @@ static std::string StringToNid(std::string_view symbol) { } Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index) - : memory{memory_}, file{file_}, name{file.stem().string()} { + : memory{memory_}, file{file_}, name{file.filename().string()} { elf.Open(file); if (elf.IsElfFile()) { LoadModuleToMemory(max_tls_index); @@ -113,10 +112,8 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { // Map module segments (and possible TLS trampolines) void** out_addr = reinterpret_cast(&base_virtual_addr); - memory->MapMemory(out_addr, memory->SystemReservedVirtualBase() + LoadOffset, - aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, - MemoryMapFlags::Fixed, VMAType::Code, name, true); - LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); + memory->MapMemory(out_addr, ModuleLoadBase, aligned_base_size + TrampolineSize, + MemoryProt::CpuReadWrite, MemoryMapFlags::NoFlags, VMAType::Code, name, true); LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr)); #ifdef ARCH_X86_64 @@ -229,7 +226,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { LOG_INFO(Core_Linker, "program entry addr ..........: {:#018x}", entry_addr); if (MemoryPatcher::g_eboot_address == 0) { - if (name == "eboot") { + if (name == "eboot.bin") { MemoryPatcher::g_eboot_address = base_virtual_addr; MemoryPatcher::g_eboot_image_size = base_size; MemoryPatcher::OnGameLoaded(); From c7fb3ebd93a40a406e4dc6fdbfc03c00c58bec4a Mon Sep 17 00:00:00 2001 From: MajorP93 Date: Wed, 7 May 2025 02:11:32 +0200 Subject: [PATCH 16/25] shader_recompiler: Widen num_conversion bitfield (#2886) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do this in order to be able to actually fit in all possible values from AmdGpu::NumberConversion. Fixes gcc compiler warnings: warning: ‘Shader::PsColorBuffer::num_conversion’ is too small to hold all values of ‘enum class AmdGpu::NumberConversion’ --- src/shader_recompiler/runtime_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 517392b98..b8ed42f5b 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -169,10 +169,10 @@ static constexpr u32 MaxColorBuffers = 8; struct PsColorBuffer { AmdGpu::NumberFormat num_format : 4; - AmdGpu::NumberConversion num_conversion : 2; + AmdGpu::NumberConversion num_conversion : 3; AmdGpu::Liverpool::ShaderExportFormat export_format : 4; u32 needs_unorm_fixup : 1; - u32 pad : 21; + u32 pad : 20; AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; From 1aa7eb8a422ca90a1b7cfcc45f30331139f7cccf Mon Sep 17 00:00:00 2001 From: Fire Cube Date: Wed, 7 May 2025 23:50:16 +0200 Subject: [PATCH 17/25] add scePthreadSetaffinity and emulate affinity (#2885) * add implementation * fix preprocessor * fixes squidbus's comments * fix clang * comment became fucked up? * fix removed return --- src/core/libraries/kernel/threads/pthread.cpp | 70 +++++++++++++++++++ src/core/libraries/kernel/threads/pthread.h | 2 + 2 files changed, 72 insertions(+) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index c4127ecf2..e791e74bf 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -289,7 +289,12 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt /* Create thread */ new_thread->native_thr = Core::NativeThread(); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); + ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); + + if (attr != nullptr && *attr != nullptr && (*attr)->cpuset != nullptr) { + new_thread->SetAffinity((*attr)->cpuset); + } if (ret) { *thread = nullptr; } @@ -521,6 +526,69 @@ int PS4_SYSV_ABI posix_pthread_setcancelstate(PthreadCancelState state, return 0; } +int Pthread::SetAffinity(const Cpuset* cpuset) { + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count < 8) { + return 0; + } + if (cpuset == nullptr) { + return POSIX_EINVAL; + } + + u64 mask = cpuset->bits; + + uintptr_t handle = native_thr.GetHandle(); + if (handle == 0) { + return POSIX_ESRCH; + } + + // We don't use this currently because some games gets performance problems + // when applying affinity even on strong hardware + /* + #ifdef _WIN64 + DWORD_PTR affinity_mask = static_cast(mask); + if (!SetThreadAffinityMask(reinterpret_cast(handle), affinity_mask)) { + return POSIX_EINVAL; + } + + #elif defined(__linux__) + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + + u64 mask = cpuset->bits; + for (int cpu = 0; cpu < std::min(64, CPU_SETSIZE); ++cpu) { + if (mask & (1ULL << cpu)) { + CPU_SET(cpu, &cpu_set); + } + } + + int result = + pthread_setaffinity_np(static_cast(handle), sizeof(cpu_set_t), &cpu_set); + if (result != 0) { + return POSIX_EINVAL; + } + #endif + */ + return 0; +} + +int PS4_SYSV_ABI posix_pthread_setaffinity_np(PthreadT thread, size_t cpusetsize, + const Cpuset* cpusetp) { + if (thread == nullptr || cpusetp == nullptr) { + return POSIX_EINVAL; + } + thread->attr.cpusetsize = cpusetsize; + return thread->SetAffinity(cpusetp); +} + +int PS4_SYSV_ABI scePthreadSetaffinity(PthreadT thread, const Cpuset mask) { + int result = posix_pthread_setaffinity_np(thread, 0x10, &mask); + if (result != 0) { + return ErrnoToSceKernelError(result); + } + return 0; +} + void RegisterThread(Core::Loader::SymbolsResolver* sym) { // Posix LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); @@ -544,6 +612,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("5KWrg7-ZqvE", "libkernel", 1, "libkernel", 1, 1, posix_pthread_setaffinity_np); // Orbis LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_once)); @@ -566,6 +635,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_setprio)); LIB_FUNCTION("rNhWz+lvOMU", "libkernel", 1, "libkernel", 1, 1, _sceKernelSetThreadDtors); LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield); + LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity) } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index 089156776..09eed11b8 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -332,6 +332,8 @@ struct Pthread { return true; } } + + int SetAffinity(const Cpuset* cpuset); }; using PthreadT = Pthread*; From 3b7c36e1ba435e96e16c81d11b5c8a526513ff21 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Wed, 7 May 2025 19:20:55 -0300 Subject: [PATCH 18/25] Clear stack before executing guest code (#2877) * Clear stack before executing guest code * clang, don't optimize me :rotating_light: avoid ClearStack function being optimized in release builds --- src/core/tls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core/tls.h b/src/core/tls.h index 6edd6a297..46ca8153b 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -5,6 +5,8 @@ #include "common/types.h" +void* memset(void* ptr, int value, size_t num); + namespace Xbyak { class CodeGenerator; } @@ -41,9 +43,18 @@ Tcb* GetTcbBase(); /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); +template +__attribute__((optnone)) void ClearStack() { + volatile void* buf = alloca(size); + memset(const_cast(buf), 0, size); + buf = nullptr; +} + template ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { EnsureThreadInitialized(); + // clear stack to avoid trash from EnsureThreadInitialized + ClearStack<13_KB>(); return func(std::forward(args)...); } From 58df609ba00e09435c79d6a6649bce6176f06f78 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Thu, 8 May 2025 19:59:12 +0300 Subject: [PATCH 19/25] Optimize games that hit unpatchable EXTRQ/INSERTQ (#2888) * Make signal handler faster * I love clang-format * Use faster decoding * MacOS CI --- src/core/cpu_patches.cpp | 259 ++++++++++++++++++++------------------- 1 file changed, 136 insertions(+), 123 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index c8106b270..8937ef04b 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -464,9 +464,8 @@ static std::pair TryPatch(u8* code, PatchModule* module) { if (needs_trampoline && instruction.length < 5) { // Trampoline is needed but instruction is too short to patch. - // Return false and length to fall back to the illegal instruction handler, - // or to signal to AOT compilation that this instruction should be skipped and - // handled at runtime. + // Return false and length to signal to AOT compilation that this instruction + // should be skipped and handled at runtime. return std::make_pair(false, instruction.length); } @@ -512,136 +511,137 @@ static std::pair TryPatch(u8* code, PatchModule* module) { #if defined(ARCH_X86_64) +static bool Is4ByteExtrqOrInsertq(void* code_address) { + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // extrq + } else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // insertq + } else { + return false; + } +} + static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { - ZydisDecodedInstruction instruction; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - const auto status = - Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + // We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed + // out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we + // don't care about. We can get information about the instruction a lot faster by writing a mini + // decoder here, since we know it is definitely an extrq or an insertq. If for some reason we + // need to interpret more instructions in the future (I don't see why we would), we can revert + // to using Zydis. + ZydisMnemonic mnemonic; + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66) { + mnemonic = ZYDIS_MNEMONIC_EXTRQ; + } else if (bytes[0] == 0xF2) { + mnemonic = ZYDIS_MNEMONIC_INSERTQ; + } else { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", + fmt::ptr(code_address), + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + return false; + } - switch (instruction.mnemonic) { + ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79); + + // Note: It's guaranteed that there's no REX prefix in these instructions checked by + // Is4ByteExtrqOrInsertq + u8 modrm = bytes[3]; + u8 rm = modrm & 0b111; + u8 reg = (modrm >> 3) & 0b111; + u8 mod = (modrm >> 6) & 0b11; + + ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only + + int dstIndex = reg; + int srcIndex = rm; + + switch (mnemonic) { case ZYDIS_MNEMONIC_EXTRQ: { - bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = lowQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[0].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[0].reg.value <= ZYDIS_REGISTER_XMM15 && - operands[1].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[1].reg.value <= ZYDIS_REGISTER_XMM15, - "Unexpected operand types for EXTRQ instruction"); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = lowQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (lowQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "extrq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordDst >>= index; - lowQWordDst &= mask; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (lowQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "extrq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordDst >>= index; + lowQWordDst &= mask; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } case ZYDIS_MNEMONIC_INSERTQ: { - bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, - "INSERTQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc, highQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = highQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED && - operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED, - "operands 2 and 3 must be unused for register form."); - - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER, - "operands 0 and 1 must be registers."); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc, highQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = highQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (highQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "insertq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordSrc &= mask; - lowQWordDst &= ~(mask << index); - lowQWordDst |= lowQWordSrc << index; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (highQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "insertq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordSrc &= mask; + lowQWordDst &= ~(mask << index); + lowQWordDst |= lowQWordSrc << index; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } default: { - LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", - fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic)); - return false; + UNREACHABLE(); } } @@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address static bool PatchesIllegalInstructionHandler(void* context) { void* code_address = Common::GetRip(context); - if (!TryPatchJit(code_address)) { + if (Is4ByteExtrqOrInsertq(code_address)) { + // The instruction is not big enough for a relative jump, don't try to patch it and pass it + // to our illegal instruction interpreter directly return TryExecuteIllegalInstruction(context, code_address); + } else { + if (!TryPatchJit(code_address)) { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address, + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + } } + return true; } From 46b88bd10f0d6d8dc59a80866a625a75e739a0af Mon Sep 17 00:00:00 2001 From: mailwl Date: Fri, 9 May 2025 11:08:22 +0300 Subject: [PATCH 20/25] [Libs] Stubs sceSigninDialog (#2890) * [Libs] Stubs SigninDialog * clang-format * clang-format again * remove magic constant * log dialog finished status --- CMakeLists.txt | 2 + src/common/logging/filter.cpp | 1 + src/common/logging/types.h | 1 + src/core/libraries/libs.cpp | 2 + .../libraries/signin_dialog/signindialog.cpp | 64 +++++++++++++++++++ .../libraries/signin_dialog/signindialog.h | 29 +++++++++ 6 files changed, 99 insertions(+) create mode 100644 src/core/libraries/signin_dialog/signindialog.cpp create mode 100644 src/core/libraries/signin_dialog/signindialog.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f55767611..9b10d0e5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -597,6 +597,8 @@ set(MISC_LIBS src/core/libraries/screenshot/screenshot.cpp src/core/libraries/move/move.h src/core/libraries/ulobjmgr/ulobjmgr.cpp src/core/libraries/ulobjmgr/ulobjmgr.h + src/core/libraries/signin_dialog/signindialog.cpp + src/core/libraries/signin_dialog/signindialog.h ) set(DEV_TOOLS src/core/devtools/layer.cpp diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 867d62916..622af93cc 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -137,6 +137,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, NpParty) \ SUB(Lib, Zlib) \ SUB(Lib, Hmd) \ + SUB(Lib, SigninDialog) \ CLS(Frontend) \ CLS(Render) \ SUB(Render, Vulkan) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index e5714a81a..27a87e082 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -104,6 +104,7 @@ enum class Class : u8 { Lib_NpParty, ///< The LibSceNpParty implementation Lib_Zlib, ///< The LibSceZlib implementation. Lib_Hmd, ///< The LibSceHmd implementation. + Lib_SigninDialog, ///< The LibSigninDialog implementation. Frontend, ///< Emulator UI Render, ///< Video Core Render_Vulkan, ///< Vulkan backend diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index 3f5baf640..3826ff793 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -45,6 +45,7 @@ #include "core/libraries/save_data/savedata.h" #include "core/libraries/screenshot/screenshot.h" #include "core/libraries/share_play/shareplay.h" +#include "core/libraries/signin_dialog/signindialog.h" #include "core/libraries/system/commondialog.h" #include "core/libraries/system/msgdialog.h" #include "core/libraries/system/posix.h" @@ -120,6 +121,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Hmd::RegisterlibSceHmd(sym); Libraries::DiscMap::RegisterlibSceDiscMap(sym); Libraries::Ulobjmgr::RegisterlibSceUlobjmgr(sym); + Libraries::SigninDialog::RegisterlibSceSigninDialog(sym); } } // namespace Libraries diff --git a/src/core/libraries/signin_dialog/signindialog.cpp b/src/core/libraries/signin_dialog/signindialog.cpp new file mode 100644 index 000000000..0e4eb63a2 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// Generated By moduleGenerator +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" +#include "signindialog.h" + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogOpen() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +Status PS4_SYSV_ABI sceSigninDialogGetStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +s32 PS4_SYSV_ABI sceSigninDialogGetResult() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogClose() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogTerminate() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("mlYGfmqE3fQ", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogInitialize); + LIB_FUNCTION("JlpJVoRWv7U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogOpen); + LIB_FUNCTION("2m077aeC+PA", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetStatus); + LIB_FUNCTION("Bw31liTFT3A", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogUpdateStatus); + LIB_FUNCTION("nqG7rqnYw1U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetResult); + LIB_FUNCTION("M3OkENHcyiU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogClose); + LIB_FUNCTION("LXlmS6PvJdU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogTerminate); +}; + +} // namespace Libraries::SigninDialog diff --git a/src/core/libraries/signin_dialog/signindialog.h b/src/core/libraries/signin_dialog/signindialog.h new file mode 100644 index 000000000..8726ad1f6 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +enum class Status : u32 { + NONE = 0, + INITIALIZED = 1, + RUNNING = 2, + FINISHED = 3, +}; + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize(); +s32 PS4_SYSV_ABI sceSigninDialogOpen(); +Status PS4_SYSV_ABI sceSigninDialogGetStatus(); +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus(); +s32 PS4_SYSV_ABI sceSigninDialogGetResult(); +s32 PS4_SYSV_ABI sceSigninDialogClose(); +s32 PS4_SYSV_ABI sceSigninDialogTerminate(); + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::SigninDialog From 8e7c5a4d995106661524173914af15aeeb11511a Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Fri, 9 May 2025 17:33:32 +0200 Subject: [PATCH 21/25] Remove deprecated include (#2893) --- src/core/libraries/libc_internal/printf.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/libraries/libc_internal/printf.h b/src/core/libraries/libc_internal/printf.h index fe63481a0..9c22e922c 100644 --- a/src/core/libraries/libc_internal/printf.h +++ b/src/core/libraries/libc_internal/printf.h @@ -56,7 +56,6 @@ #include #include -#include #include #include #include From b130fe6ed59277ff66ff8579ce3aa14452f2416c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 9 May 2025 08:43:20 -0700 Subject: [PATCH 22/25] vulkan: Handle incompatible depth format using null binding. (#2892) Co-authored-by: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> --- src/shader_recompiler/info.h | 5 +++ .../ir/passes/resource_tracking_pass.cpp | 6 +++ src/video_core/amdgpu/resource.h | 13 +++++++ .../renderer_vulkan/vk_rasterizer.cpp | 5 ++- src/video_core/texture_cache/image_view.h | 2 - .../texture_cache/texture_cache.cpp | 37 ++++++++++++------- src/video_core/texture_cache/texture_cache.h | 4 ++ 7 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 784f8b4d2..12e48c8e4 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -281,6 +281,11 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept // Fall back to null image if unbound. return AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + return AmdGpu::Image::NullDepth(); + } return image; } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 1de255e4d..cc0bf83d3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -363,6 +363,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); image = AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (inst_info.is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + LOG_ERROR(Render_Vulkan, "Shader compiled using non-depth image with depth instruction!"); + image = AmdGpu::Image::NullDepth(); + } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index c387c7bf2..9060074fb 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -219,6 +219,19 @@ struct Image { return image; } + static constexpr Image NullDepth() { + Image image{}; + image.data_format = u64(DataFormat::Format32); + image.num_format = u64(NumberFormat::Float); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); + image.tiling_index = u64(TilingMode::Texture_MicroTiled); + image.type = u64(ImageType::Color2D); + return image; + } + bool Valid() const { return (type & 0x8u) != 0; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4caa781b9..e7b42a34b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -618,8 +618,9 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + auto& null_image_view = + texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, vk::ImageLayout::eGeneral); } } else { diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 23c703d23..6a17490bf 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -34,8 +34,6 @@ struct ImageViewInfo { struct Image; -constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0}; - struct ImageView { ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, ImageId image_id); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 047bb3dfe..82f4d6413 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -8,6 +8,7 @@ #include "common/debug.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/host_compatibility.h" @@ -23,31 +24,41 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, tile_manager{instance, scheduler} { + // Create basic null image at fixed image ID. + const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); + ASSERT(null_id.index == NULL_IMAGE_ID.index); +} + +TextureCache::~TextureCache() = default; + +ImageId TextureCache::GetNullImage(const vk::Format format) { + const auto existing_image = null_images.find(format); + if (existing_image != null_images.end()) { + return existing_image->second; + } + ImageInfo info{}; - info.pixel_format = vk::Format::eR8G8B8A8Unorm; + info.pixel_format = format; info.type = vk::ImageType::e2D; - info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled); + info.tiling_idx = static_cast(AmdGpu::TilingMode::Texture_MicroTiled); info.num_bits = 32; info.UpdateSize(); + const ImageId null_id = slot_images.insert(instance, scheduler, info); - ASSERT(null_id.index == NULL_IMAGE_ID.index); auto& img = slot_images[null_id]; + const vk::Image& null_image = img.image; - Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image"); + Vulkan::SetObjectName(instance.GetDevice(), null_image, + fmt::format("Null Image ({})", vk::to_string(format))); + img.flags = ImageFlagBits::Empty; img.track_addr = img.info.guest_address; img.track_addr_end = img.info.guest_address + img.info.guest_size; - ImageViewInfo view_info; - const auto null_view_id = - slot_image_views.insert(instance, view_info, slot_images[null_id], null_id); - ASSERT(null_view_id.index == NULL_IMAGE_VIEW_ID.index); - const vk::ImageView& null_image_view = slot_image_views[null_view_id].image_view.get(); - Vulkan::SetObjectName(instance.GetDevice(), null_image_view, "Null Image View"); + null_images.emplace(format, null_id); + return null_id; } -TextureCache::~TextureCache() = default; - void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { if (image.hash == 0) { // Initialize hash @@ -296,7 +307,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { const auto& info = desc.info; if (info.guest_address == 0) [[unlikely]] { - return NULL_IMAGE_ID; + return GetNullImage(info.pixel_format); } std::scoped_lock lock{mutex}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f262768ea..b6bf88958 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -246,6 +246,9 @@ private: } } + /// Gets or creates a null image for a particular format. + ImageId GetNullImage(vk::Format format); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -285,6 +288,7 @@ private: Common::SlotVector slot_images; Common::SlotVector slot_image_views; tsl::robin_map samplers; + tsl::robin_map null_images; PageTable page_table; std::mutex mutex; From 8d7cbf9943f1b8476bee7bde758b77d0d4d4edff Mon Sep 17 00:00:00 2001 From: Missake212 Date: Fri, 9 May 2025 17:01:34 +0100 Subject: [PATCH 23/25] Adding opcode IMAGE_SAMPLE_B_O (#2894) * Adding opcode IMAGE_SAMPLE_B_O: * fix clang (my first time !) --- src/shader_recompiler/frontend/translate/vector_memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index cfc01c58f..5639bc56a 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -143,6 +143,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_O: case Opcode::IMAGE_SAMPLE_L_O: + case Opcode::IMAGE_SAMPLE_B_O: case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_C_O: case Opcode::IMAGE_SAMPLE_C_LZ_O: From a1439b15cf572a862dfd01dea1dbe71c66b473d7 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 9 May 2025 10:04:37 -0700 Subject: [PATCH 24/25] gnm: Implement sceGnmDrawIndexIndirectMulti (#2889) --- src/core/libraries/gnmdriver/gnmdriver.cpp | 38 +++++++++++++++---- src/core/libraries/gnmdriver/gnmdriver.h | 4 +- src/video_core/amdgpu/liverpool.cpp | 37 ++++++++++++++---- src/video_core/amdgpu/pm4_cmds.h | 26 +++++++++++-- .../renderer_vulkan/vk_instance.cpp | 1 + 5 files changed, 86 insertions(+), 20 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 25ac4921c..f2f40e0e3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -505,9 +505,10 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 flags) { LOG_TRACE(Lib_GnmDriver, "called"); - if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) && - (shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) && - (instance_sgpr_offset < 0x10u)) { + if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && cmdbuf && (size == 16) && + (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { cmdbuf = WriteHeader(cmdbuf, 2); cmdbuf = WriteBody(cmdbuf, 0u); @@ -535,10 +536,33 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da return -1; } -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - UNREACHABLE(); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 11) && (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { + + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader( + cmdbuf, 6, PM4ShaderType::ShaderGraphics, predicate); + + const auto sgpr_offset = indirect_sgpr_offsets[shader_stage]; + + cmdbuf[0] = data_offset; + cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[3] = max_count; + cmdbuf[4] = sizeof(DrawIndexedIndirectArgs); + cmdbuf[5] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0; + + cmdbuf += 6; + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 94d06c85f..a3d4968d3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -51,7 +51,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 max_count, u64 count_addr, u32 shader_stage, u32 vertex_sgpr_offset, u32 instance_sgpr_offset, u32 flags); -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, u32 flags); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 967b952c6..4c8e3367a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -455,14 +455,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_indirect->data_offset; - const auto size = sizeof(DrawIndirectArgs); + const auto stride = sizeof(DrawIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndirect", cmd_address)); - rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(false, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; @@ -471,7 +471,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_index_indirect->data_offset; - const auto size = sizeof(DrawIndexedIndirectArgs); + const auto stride = sizeof(DrawIndexedIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -479,25 +479,46 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirect", cmd_address)); - rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; } - case PM4ItOpcode::DrawIndexIndirectCountMulti: { + case PM4ItOpcode::DrawIndexIndirectMulti: { const auto* draw_index_indirect = reinterpret_cast(header); const auto offset = draw_index_indirect->data_offset; if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin( + fmt::format("gfx:{}:DrawIndexIndirectMulti", cmd_address)); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, 0); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DrawIndexIndirectCountMulti: { + const auto* draw_index_indirect = + reinterpret_cast(header); + const auto offset = draw_index_indirect->data_offset; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirectCountMulti", cmd_address)); - rasterizer->DrawIndirect( - true, indirect_args_addr, offset, draw_index_indirect->stride, - draw_index_indirect->count, draw_index_indirect->countAddr); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, + draw_index_indirect->count_indirect_enable.Value() + ? draw_index_indirect->count_addr + : 0); rasterizer->ScopeMarkerEnd(); } break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index ae1d32e00..6b55f5b65 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -860,6 +860,24 @@ struct PM4CmdDrawIndexIndirect { }; struct PM4CmdDrawIndexIndirectMulti { + PM4Type3Header header; ///< header + u32 data_offset; ///< Byte aligned offset where the required data structure starts + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the + ///< BaseVertexLocation it fetched from memory + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the + ///< StartInstanceLocation it fetched from memory + }; + u32 count; ///< Count of data structures to loop through before going to next packet + u32 stride; ///< Stride in memory from one data structure to the next + u32 draw_initiator; ///< Draw Initiator Register +}; + +struct PM4CmdDrawIndexIndirectCountMulti { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -874,14 +892,14 @@ struct PM4CmdDrawIndexIndirectMulti { }; union { u32 dw4; - BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count + BitField<0, 16, u32> draw_index_loc; ///< register offset to write the Draw Index count BitField<30, 1, u32> - countIndirectEnable; ///< Indicates the data structure count is in memory + count_indirect_enable; ///< Indicates the data structure count is in memory BitField<31, 1, u32> - drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC + draw_index_enable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC }; u32 count; ///< Count of data structures to loop through before going to next packet - u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set + u64 count_addr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set u32 stride; ///< Stride in memory from one data structure to the next u32 draw_initiator; ///< Draw Initiator Register }; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 99f225d79..1004d850f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -338,6 +338,7 @@ bool Instance::CreateDevice() { .geometryShader = features.geometryShader, .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, + .multiDrawIndirect = features.multiDrawIndirect, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, .depthBounds = features.depthBounds, From 6477dc4f1e699981919022ac69fef59813a9ad94 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 9 May 2025 14:33:04 -0500 Subject: [PATCH 25/25] Core: Memory Fixes (#2872) * Fix VirtualQuery behavior on low addresses. * Fix VirtualQuery struct Somewhere in our BitField and array use, the size of our VirtualQuery struct became larger than the struct used on real hardware. Fixing this fixes some data corruption visible in the name parameter during my tests. * Default name to anon On real hardware, nameless mappings are given the name "anon:address" where address appears to be the address that made the memory call. For simplicity sake, I'll stick to the name "anon" for now. * Place an upper bound on returns from SearchFree Right now, this upper bound is set based on the limitations of our GPU buffer cache and page table. Someone with more experience in that area of code should probably fix that at some point. * More anons * Clang * Fix name in sceKernelMapNamedDirectMemory * strncpy instead of strcpy Hardcoded the constant size for now, I need to review how real hardware behaves here to determine if anything else is necessary for this to be accurate. * Fix name behavior All memory naming functions restrict the name size to a 31 character limit, and return `ORBIS_KERNEL_ERROR_ENAMETOOLONG` if that limit is exceeded. Since this value is constant for all functions involving names, I've defined it as a constant in kernel's memory.h, and used that in place of any hardcoded 32 character limits. * Error logging Hopefully this helps in catching the UFC regression? * Increase address space upper bound Probably needs heavy testing, especially on Mac/Windows. This increases the address space, as needed to accommodate strange memory behaviors seen in UFC. * VirtualQuery fix Due to limitations of certain platforms, we initialize our vma_map with 3 separate free mappings. As such, we need to use a while loop here to accurately query mappings with high addresses * Fix mappings to high addresses The PS4's GPU can only handle 40bit addresses. Our texture cache and buffer cache were designed around these limits, and mapping to higher addresses would cause segmentation faults and access violations. To fix these crashes, only map to the GPU if the mapping is fully contained within the address space the GPU should access. I'm open to suggestions on how to make this cleaner * Revert "Increase address space upper bound" This reverts commit 3d50eeeebb6aa40e38d6f87e6480235c917843f3. * Revert VirtualQuery while loop Windows wasn't happy with this, again. Will try to debug and properly fix this when I have a good chance. * Fix asserts FindVMA, due to the way it's programmed, never actually returns vma_map.end(), the furthest it ever returns is the last valid memory area. All those asserts we involving vma_map.end() never actually trigger due to this. This commit removes redundant asserts, adds messages to asserts that were lacking them, and fixes all asserts designed to detect out of bounds memory accesses so they actually trigger. I've also fixed some potential memory safety issues. * Proper error behavior in QueryProtection Might as well handle this properly while I'm here. * Clang * More information about ReserveVirtualRange results Should help debug issues like the one in The Order: 1886 (CUSA00076) * Fix assert message * Update assert message Extra space * Fix my bug Oh hey, finally something that's my fault. * Fix rasterizer unmaps Should use adjusted_size here, otherwise we could unmap too much. Thanks to diegolix29 for spotting this. * Fix edge case in MapMemory Code comments explain everything. This should fix some memory asserts. * Fix fix Avoid running the code path if it's unnecessary, since there are many additional edge cases to handle when the VMA map is small. * Fix fix fix Should prevent infinite loops, haven't tested properly yet though. * Split logging for inputs and out_addr in ReserveVirtualRange Addresses review comments. --- src/core/libraries/kernel/memory.cpp | 54 ++++---- src/core/libraries/kernel/memory.h | 16 +-- src/core/memory.cpp | 177 ++++++++++++++++++++------- src/core/memory.h | 8 +- 4 files changed, 179 insertions(+), 76 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 8a0c91479..495ddc52f 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -126,9 +126,6 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, size_t infoSize) { LOG_INFO(Kernel_Vmm, "called addr = {}, flags = {:#x}", fmt::ptr(addr), flags); - if (!addr) { - return ORBIS_KERNEL_ERROR_EACCES; - } auto* memory = Core::Memory::Instance(); return memory->VirtualQuery(std::bit_cast(addr), flags, info); } @@ -136,7 +133,6 @@ s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtual s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment) { LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, flags = {:#x}, alignment = {:#x}", fmt::ptr(*addr), len, flags, alignment); - if (addr == nullptr) { LOG_ERROR(Kernel_Vmm, "Address is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -155,9 +151,12 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u auto* memory = Core::Memory::Instance(); const VAddr in_addr = reinterpret_cast(*addr); const auto map_flags = static_cast(flags); - memory->Reserve(addr, in_addr, len, map_flags, alignment); - return ORBIS_OK; + s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment); + if (result == 0) { + LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); + } + return result; } int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, int flags, @@ -172,10 +171,12 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (!Common::Is16KBAligned(directMemoryStart)) { LOG_ERROR(Kernel_Vmm, "Start address is not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (alignment != 0) { if ((!std::has_single_bit(alignment) && !Common::Is16KBAligned(alignment))) { LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!"); @@ -183,14 +184,19 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i } } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); auto* memory = Core::Memory::Instance(); const auto ret = - memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "", false, - directMemoryStart, alignment); + memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, name, + false, directMemoryStart, alignment); LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); return ret; @@ -199,7 +205,8 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i int PS4_SYSV_ABI sceKernelMapDirectMemory(void** addr, u64 len, int prot, int flags, s64 directMemoryStart, u64 alignment) { LOG_INFO(Kernel_Vmm, "called, redirected to sceKernelMapNamedDirectMemory"); - return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, ""); + return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, + "anon"); } s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot, @@ -210,17 +217,16 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t return ORBIS_KERNEL_ERROR_EINVAL; } - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr_in_out); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); @@ -236,7 +242,7 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, int flags) { - return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, ""); + return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon"); } int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { @@ -304,7 +310,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT: { result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length, entries[i].protection, flags, - static_cast(entries[i].offset), 0, ""); + static_cast(entries[i].offset), 0, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, " "result = {}", @@ -326,7 +332,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE: { result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length, - entries[i].protection, flags, ""); + entries[i].protection, flags, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, type = {}, " "result = {}", @@ -356,16 +362,16 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) { - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + auto* memory = Core::Memory::Instance(); memory->NameVirtualRange(std::bit_cast(addr), len, name); return ORBIS_OK; diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 400b6c3fc..6acb559d1 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -47,6 +47,8 @@ enum MemoryOpTypes : u32 { ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4 }; +constexpr u32 ORBIS_KERNEL_MAXIMUM_NAME_LENGTH = 32; + struct OrbisQueryInfo { uintptr_t start; uintptr_t end; @@ -59,14 +61,12 @@ struct OrbisVirtualQueryInfo { size_t offset; s32 protection; s32 memory_type; - union { - BitField<0, 1, u32> is_flexible; - BitField<1, 1, u32> is_direct; - BitField<2, 1, u32> is_stack; - BitField<3, 1, u32> is_pooled; - BitField<4, 1, u32> is_committed; - }; - std::array name; + u32 is_flexible : 1; + u32 is_direct : 1; + u32 is_stack : 1; + u32 is_pooled : 1; + u32 is_committed : 1; + char name[ORBIS_KERNEL_MAXIMUM_NAME_LENGTH]; }; struct OrbisKernelBatchMapEntry { diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 494ffa70c..9861e813a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -75,7 +75,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { // Clamp size to the remaining size of the current VMA. auto vma = FindVMA(virtual_addr); - ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr); + ASSERT_MSG(vma->second.Contains(virtual_addr, 0), + "Attempted to access invalid GPU address {:#x}", virtual_addr); u64 clamped_size = vma->second.base + vma->second.size - virtual_addr; ++vma; @@ -96,6 +97,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { const VAddr virtual_addr = std::bit_cast(address); const auto& vma = FindVMA(virtual_addr)->second; + ASSERT_MSG(vma.Contains(virtual_addr, 0), + "Attempting to access out of bounds memory at address {:#x}", virtual_addr); if (vma.type != VMAType::Direct) { return false; } @@ -145,10 +148,12 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, auto mapping_end = mapping_start + size; // Find the first free, large enough dmem area in the range. - while ((!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) && - dmem_area != dmem_map.end()) { + while (!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) { // The current dmem_area isn't suitable, move to the next one. dmem_area++; + if (dmem_area == dmem_map.end()) { + break; + } // Update local variables based on the new dmem_area mapping_start = Common::AlignUp(dmem_area->second.base, alignment); @@ -172,7 +177,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; auto dmem_area = CarveDmemArea(phys_addr, size); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -216,12 +220,18 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -229,7 +239,7 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::PoolReserved; MergeAdjacent(vma_map, new_vma_handle); @@ -247,19 +257,25 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - auto& vma = FindVMA(mapped_addr)->second; + auto vma = FindVMA(mapped_addr)->second; // If the VMA is mapped, unmap the region first. if (vma.IsMapped()) { UnmapMemoryImpl(mapped_addr, size); vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -267,7 +283,7 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::Reserved; MergeAdjacent(vma_map, new_vma_handle); @@ -288,7 +304,9 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); // Perform the mapping. void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false); @@ -302,7 +320,10 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) new_vma.is_exec = false; new_vma.phys_base = 0; - rasterizer->MapMemory(mapped_addr, size); + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } + return ORBIS_OK; } @@ -325,15 +346,34 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. - const auto& vma = FindVMA(mapped_addr)->second; - const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + auto vma = FindVMA(mapped_addr)->second; + size_t remaining_size = vma.base + vma.size - mapped_addr; + // There's a possible edge case where we're mapping to a partially reserved range. + // To account for this, unmap any reserved areas within this mapping range first. + auto unmap_addr = mapped_addr; + auto unmap_size = size; + while (!vma.IsMapped() && unmap_addr < mapped_addr + size && remaining_size < size) { + auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size); + unmap_addr += unmapped; + unmap_size -= unmapped; + vma = FindVMA(unmap_addr)->second; + } + + // This should return SCE_KERNEL_ERROR_ENOMEM but rarely happens. + vma = FindVMA(mapped_addr)->second; + remaining_size = vma.base + vma.size - mapped_addr; + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Perform the mapping. @@ -353,7 +393,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M if (type == VMAType::Flexible) { flexible_usage += size; } - rasterizer->MapMemory(mapped_addr, size); + + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } return ORBIS_OK; } @@ -366,12 +409,18 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size_aligned, 1); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } if (True(flags & MemoryMapFlags::Fixed)) { const auto& vma = FindVMA(virtual_addr)->second; const size_t remaining_size = vma.base + vma.size - virtual_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Map the file. @@ -404,7 +453,9 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { const auto start_in_vma = virtual_addr - vma_base_addr; const auto type = vma_base.type; - rasterizer->UnmapMemory(virtual_addr, size); + if (IsValidGpuMapping(virtual_addr, size)) { + rasterizer->UnmapMemory(virtual_addr, size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, size); @@ -444,7 +495,10 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma if (type == VMAType::Flexible) { flexible_usage -= adjusted_size; } - rasterizer->UnmapMemory(virtual_addr, adjusted_size); + + if (IsValidGpuMapping(virtual_addr, adjusted_size)) { + rasterizer->UnmapMemory(virtual_addr, adjusted_size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, adjusted_size); @@ -471,6 +525,8 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) { do { auto it = FindVMA(virtual_addr + unmapped_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr + unmapped_bytes, 0), + "Address {:#x} is out of bounds", virtual_addr + unmapped_bytes); auto unmapped = UnmapBytesFromEntry(virtual_addr + unmapped_bytes, vma_base, size - unmapped_bytes); ASSERT_MSG(unmapped > 0, "Failed to unmap memory, progress is impossible"); @@ -485,7 +541,10 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr const auto it = FindVMA(addr); const auto& vma = it->second; - ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped"); + if (!vma.Contains(addr, 0) || vma.IsFree()) { + LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr); + return ORBIS_KERNEL_ERROR_EACCES; + } if (start != nullptr) { *start = reinterpret_cast(vma.base); @@ -555,6 +614,8 @@ s32 MemoryManager::Protect(VAddr addr, size_t size, MemoryProt prot) { do { auto it = FindVMA(addr + protected_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(addr + protected_bytes, 0), "Address {:#x} is out of bounds", + addr + protected_bytes); auto result = 0; result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot); if (result < 0) { @@ -571,8 +632,16 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, ::Libraries::Kernel::OrbisVirtualQueryInfo* info) { std::scoped_lock lk{mutex}; - auto it = FindVMA(addr); - if (it->second.type == VMAType::Free && flags == 1) { + // FindVMA on addresses before the vma_map return garbage data. + auto query_addr = + addr < impl.SystemManagedVirtualBase() ? impl.SystemManagedVirtualBase() : addr; + if (addr < query_addr && flags == 0) { + LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); + return ORBIS_KERNEL_ERROR_EACCES; + } + auto it = FindVMA(query_addr); + + while (it->second.type == VMAType::Free && flags == 1 && it != --vma_map.end()) { ++it; } if (it->second.type == VMAType::Free) { @@ -585,15 +654,17 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->end = vma.base + vma.size; info->offset = vma.phys_base; info->protection = static_cast(vma.prot); - info->is_flexible.Assign(vma.type == VMAType::Flexible); - info->is_direct.Assign(vma.type == VMAType::Direct); - info->is_stack.Assign(vma.type == VMAType::Stack); - info->is_pooled.Assign(vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled); - info->is_committed.Assign(vma.IsMapped()); - vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); + info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0; + info->is_direct = vma.type == VMAType::Direct ? 1 : 0; + info->is_stack = vma.type == VMAType::Stack ? 1 : 0; + info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0; + info->is_committed = vma.IsMapped() ? 1 : 0; + + strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH); + if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); - ASSERT(dmem_it != dmem_map.end()); + ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!"); info->memory_type = dmem_it->second.memory_type; } else { info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION; @@ -607,11 +678,11 @@ int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, std::scoped_lock lk{mutex}; auto dmem_area = FindDmemArea(addr); - while (dmem_area != dmem_map.end() && dmem_area->second.is_free && find_next) { + while (dmem_area != --dmem_map.end() && dmem_area->second.is_free && find_next) { dmem_area++; } - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to query!"); return ORBIS_KERNEL_ERROR_EACCES; } @@ -691,36 +762,56 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) virtual_addr = min_search_address; } + // If the requested address is beyond the maximum our code can handle, throw an assert + auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize(); + ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds", + virtual_addr); + auto it = FindVMA(virtual_addr); - ASSERT_MSG(it != vma_map.end(), "Specified mapping address was not found!"); // If the VMA is free and contains the requested mapping we are done. if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) { return virtual_addr; } + // Search for the first free VMA that fits our mapping. - const auto is_suitable = [&] { + while (it != vma_map.end()) { if (!it->second.IsFree()) { - return false; + it++; + continue; } + const auto& vma = it->second; virtual_addr = Common::AlignUp(vma.base, alignment); // Sometimes the alignment itself might be larger than the VMA. if (virtual_addr > vma.base + vma.size) { - return false; + it++; + continue; } + + // Make sure the address is within our defined bounds + if (virtual_addr >= max_search_address) { + // There are no free mappings within our safely usable address space. + break; + } + + // If there's enough space in the VMA, return the address. const size_t remaining_size = vma.base + vma.size - virtual_addr; - return remaining_size >= size; - }; - while (!is_suitable()) { - ++it; + if (remaining_size >= size) { + return virtual_addr; + } + it++; } - return virtual_addr; + + // Couldn't find a suitable VMA, return an error. + LOG_ERROR(Kernel_Vmm, "Couldn't find a free mapping for address {:#x}, size {:#x}", + virtual_addr, size); + return -1; } MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); - ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); + ASSERT_MSG(vma_handle->second.Contains(virtual_addr, 0), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region"); @@ -749,7 +840,7 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); - ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); + ASSERT_MSG(addr <= dmem_handle->second.GetEnd(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); @@ -804,7 +895,7 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, auto dmem_area = FindDmemArea(addr); - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (addr > dmem_area->second.GetEnd() || dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to check type!"); return ORBIS_KERNEL_ERROR_ENOENT; } diff --git a/src/core/memory.h b/src/core/memory.h index a6a55e288..3a204eb96 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -157,6 +157,12 @@ public: return impl.SystemReservedVirtualBase(); } + bool IsValidGpuMapping(VAddr virtual_addr, u64 size) { + // The PS4's GPU can only handle 40 bit addresses. + const VAddr max_gpu_address{0x10000000000}; + return virtual_addr + size < max_gpu_address; + } + bool IsValidAddress(const void* addr) const noexcept { const VAddr virtual_addr = reinterpret_cast(addr); const auto end_it = std::prev(vma_map.end()); @@ -186,7 +192,7 @@ public: int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot); int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, - MemoryMapFlags flags, VMAType type, std::string_view name = "", + MemoryMapFlags flags, VMAType type, std::string_view name = "anon", bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0); int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,