From efa7093f343c68b474535d1aa70d5ec921dc4e53 Mon Sep 17 00:00:00 2001
From: Lander Gallastegi <LNDF@users.noreply.github.com>
Date: Wed, 2 Jul 2025 15:54:14 +0200
Subject: [PATCH 1/5] Use shared_first_mutex (#3179)

---
 CMakeLists.txt                                |  1 +
 src/common/shared_first_mutex.h               | 46 +++++++++++++++++++
 .../renderer_vulkan/vk_rasterizer.h           |  3 +-
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 src/common/shared_first_mutex.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 466933608..38532760d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -689,6 +689,7 @@ set(COMMON src/common/logging/backend.cpp
            src/common/recursive_lock.cpp
            src/common/recursive_lock.h
            src/common/sha1.h
+           src/common/shared_first_mutex.h
            src/common/signal_context.h
            src/common/signal_context.cpp
            src/common/singleton.h
diff --git a/src/common/shared_first_mutex.h b/src/common/shared_first_mutex.h
new file mode 100644
index 000000000..b150c956b
--- /dev/null
+++ b/src/common/shared_first_mutex.h
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+
+namespace Common {
+
+// Like std::shared_mutex, but reader has priority over writer.
+class SharedFirstMutex {
+public:
+    void lock() {
+        std::unique_lock<std::mutex> lock(mtx);
+        cv.wait(lock, [this]() { return !writer_active && readers == 0; });
+        writer_active = true;
+    }
+
+    void unlock() {
+        std::lock_guard<std::mutex> lock(mtx);
+        writer_active = false;
+        cv.notify_all();
+    }
+
+    void lock_shared() {
+        std::unique_lock<std::mutex> lock(mtx);
+        cv.wait(lock, [this]() { return !writer_active; });
+        ++readers;
+    }
+
+    void unlock_shared() {
+        std::lock_guard<std::mutex> lock(mtx);
+        if (--readers == 0) {
+            cv.notify_all();
+        }
+    }
+
+private:
+    std::mutex mtx;
+    std::condition_variable cv;
+    int readers = 0;
+    bool writer_active = false;
+};
+
+} // namespace Common
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c570ea368..4a978746c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -5,6 +5,7 @@
 
 #include <shared_mutex>
 #include "common/recursive_lock.h"
+#include "common/shared_first_mutex.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/page_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -122,7 +123,7 @@ private:
     AmdGpu::Liverpool* liverpool;
     Core::MemoryManager* memory;
     boost::icl::interval_set<VAddr> mapped_ranges;
-    std::shared_mutex mapped_ranges_mutex;
+    Common::SharedFirstMutex mapped_ranges_mutex;
     PipelineCache pipeline_cache;
 
     boost::container::static_vector<

From 9eae6b57ceb27cbc009740e5335130665d3aa333 Mon Sep 17 00:00:00 2001
From: nickci2002 <58965309+nickci2002@users.noreply.github.com>
Date: Wed, 2 Jul 2025 12:22:30 -0400
Subject: [PATCH 2/5] V_CMP_EQ_U64 support (#3153)

* Added V_CMP_EQ_U64 shader opcode support and added 64-bit relational operators (<,>,<=,>=)

* Fixed clang-format crying because I typed xargs clang-format instead of xargs clang-format-19

* Replaced V_CMP_EQ_U64 code to match V_CMP_U32 to test

* Updated V_CMP_U64 for future addons
---
 .../backend/spirv/emit_spirv_instructions.h   | 18 ++++---
 .../backend/spirv/emit_spirv_integer.cpp      | 36 ++++++++++---
 .../frontend/translate/translate.h            |  4 +-
 .../frontend/translate/vector_alu.cpp         | 53 +++++++++----------
 src/shader_recompiler/ir/ir_emitter.cpp       | 44 ++++++++++++---
 src/shader_recompiler/ir/ir_emitter.h         |  6 +--
 src/shader_recompiler/ir/opcodes.inc          | 18 ++++---
 .../ir/passes/constant_propagation_pass.cpp   | 30 ++++++++---
 8 files changed, 145 insertions(+), 64 deletions(-)

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 15a8fd99b..08ea2c1cd 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -406,14 +406,20 @@ Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
 Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
 Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 1a995354d..ddc1e7574 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -371,19 +371,35 @@ Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
 }
 
@@ -395,11 +411,19 @@ Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpINotEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
 }
 
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index ece334bcd..b5bfec344 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -20,7 +20,7 @@ namespace Shader::Gcn {
 enum class ConditionOp : u32 {
     F,
     EQ,
-    LG,
+    LG, // NE
     GT,
     GE,
     LT,
@@ -230,7 +230,7 @@ public:
     // VOPC
     void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
     void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
-    void V_CMP_NE_U64(const GcnInst& inst);
+    void V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
     void V_CMP_CLASS_F32(const GcnInst& inst);
 
     // VOP3a
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index 3b88e4dec..448622f0e 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -327,8 +327,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
         return V_CMP_U32(ConditionOp::TRU, false, true, inst);
 
         //     V_CMP_{OP8}_U64
+    case Opcode::V_CMP_EQ_U64:
+        return V_CMP_U64(ConditionOp::EQ, false, false, inst);
     case Opcode::V_CMP_NE_U64:
-        return V_CMP_NE_U64(inst);
+        return V_CMP_U64(ConditionOp::LG, false, false, inst);
 
     case Opcode::V_CMP_CLASS_F32:
         return V_CMP_CLASS_F32(inst);
@@ -996,39 +998,32 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const
     }
 }
 
-void Translator::V_CMP_NE_U64(const GcnInst& inst) {
-    const auto get_src = [&](const InstOperand& operand) {
-        switch (operand.field) {
-        case OperandField::VccLo:
-            return ir.GetVcc();
-        case OperandField::ExecLo:
-            return ir.GetExec();
-        case OperandField::ScalarGPR:
-            return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
-        case OperandField::ConstZero:
-            return ir.Imm1(false);
+void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
+    const IR::U64 src0{GetSrc64(inst.src[0])};
+    const IR::U64 src1{GetSrc64(inst.src[1])};
+    const IR::U1 result = [&] {
+        switch (op) {
+        case ConditionOp::EQ:
+            return ir.IEqual(src0, src1);
+        case ConditionOp::LG: // NE
+            return ir.INotEqual(src0, src1);
         default:
-            UNREACHABLE();
+            UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op));
         }
-    };
-    const IR::U1 src0{get_src(inst.src[0])};
-    auto op = [&inst, this](auto x) {
-        switch (inst.src[1].field) {
-        case OperandField::ConstZero:
-            return x;
-        case OperandField::SignedConstIntNeg:
-            return ir.LogicalNot(x);
-        default:
-            UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field));
-        }
-    };
+    }();
+
+    if (is_signed) {
+        UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported");
+    }
+    if (set_exec) {
+        UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported");
+    }
+
     switch (inst.dst[1].field) {
     case OperandField::VccLo:
-        ir.SetVcc(op(src0));
-        break;
+        return ir.SetVcc(result);
     case OperandField::ScalarGPR:
-        ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0));
-        break;
+        return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
     default:
         UNREACHABLE();
     }
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 2497864c0..1f30a9565 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -1712,12 +1712,32 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
     }
 }
 
-U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
-    return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+U1 IREmitter::ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
+    if (lhs.Type() != rhs.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(is_signed ? Opcode::SLessThanEqual32 : Opcode::ULessThanEqual32, lhs, rhs);
+    case Type::U64:
+        return Inst<U1>(is_signed ? Opcode::SLessThanEqual64 : Opcode::ULessThanEqual64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
-U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
-    return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+U1 IREmitter::IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
+    if (lhs.Type() != rhs.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThan32 : Opcode::UGreaterThan32, lhs, rhs);
+    case Type::U64:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThan64 : Opcode::UGreaterThan64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
 U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
@@ -1734,8 +1754,20 @@ U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
     }
 }
 
-U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
-    return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+U1 IREmitter::IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
+    if (lhs.Type() != rhs.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual32 : Opcode::UGreaterThanEqual32, lhs,
+                        rhs);
+    case Type::U64:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual64 : Opcode::UGreaterThanEqual64, lhs,
+                        rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
 U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index 9e2f79978..119e3752e 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -299,10 +299,10 @@ public:
 
     [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
     [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
-    [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
-    [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
     [[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs);
-    [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
 
     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index 7fc514de9..d177017f2 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -382,14 +382,20 @@ OPCODE(ULessThan32,                                         U1,             U32,
 OPCODE(ULessThan64,                                         U1,             U64,            U64,                                                            )
 OPCODE(IEqual32,                                            U1,             U32,            U32,                                                            )
 OPCODE(IEqual64,                                            U1,             U64,            U64,                                                            )
-OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                                            )
-OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                                            )
-OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                                            )
-OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual32,                                    U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual64,                                    U1,             U64,            U64,                                                            )
+OPCODE(ULessThanEqual32,                                    U1,             U32,            U32,                                                            )
+OPCODE(ULessThanEqual64,                                    U1,             U64,            U64,                                                            )
+OPCODE(SGreaterThan32,                                      U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThan64,                                      U1,             U64,            U64,                                                            )
+OPCODE(UGreaterThan32,                                      U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThan64,                                      U1,             U64,            U64,                                                            )
 OPCODE(INotEqual32,                                         U1,             U32,            U32,                                                            )
 OPCODE(INotEqual64,                                         U1,             U64,            U64,                                                            )
-OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
-OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual32,                                 U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual64,                                 U1,             U64,            U64,                                                            )
+OPCODE(UGreaterThanEqual32,                                 U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThanEqual64,                                 U1,             U64,            U64,                                                            )
 
 // Logical operations
 OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
index 5c66b1115..2a39d3a2e 100644
--- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
@@ -381,24 +381,42 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
     case IR::Opcode::ULessThan64:
         FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; });
         return;
-    case IR::Opcode::SLessThanEqual:
+    case IR::Opcode::SLessThanEqual32:
         FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
         return;
-    case IR::Opcode::ULessThanEqual:
+    case IR::Opcode::SLessThanEqual64:
+        FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a <= b; });
+        return;
+    case IR::Opcode::ULessThanEqual32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
         return;
-    case IR::Opcode::SGreaterThan:
+    case IR::Opcode::ULessThanEqual64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a <= b; });
+        return;
+    case IR::Opcode::SGreaterThan32:
         FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
         return;
-    case IR::Opcode::UGreaterThan:
+    case IR::Opcode::SGreaterThan64:
+        FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a > b; });
+        return;
+    case IR::Opcode::UGreaterThan32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
         return;
-    case IR::Opcode::SGreaterThanEqual:
+    case IR::Opcode::UGreaterThan64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a > b; });
+        return;
+    case IR::Opcode::SGreaterThanEqual32:
         FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
         return;
-    case IR::Opcode::UGreaterThanEqual:
+    case IR::Opcode::SGreaterThanEqual64:
+        FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a >= b; });
+        return;
+    case IR::Opcode::UGreaterThanEqual32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
         return;
+    case IR::Opcode::UGreaterThanEqual64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a >= b; });
+        return;
     case IR::Opcode::IEqual32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
         return;

From 7431b3000532d82107c624899a263b8f427f8280 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= <marcinmikolajcz@gmail.com>
Date: Thu, 3 Jul 2025 03:13:07 +0200
Subject: [PATCH 3/5] Support for BUFFER_ATOMIC_S/UMIN_X2 (#3182)

* Fix BufferAtomicS/UMax64 SPIR-V emitting

* Support for BUFFER_ATOMIC_S/UMIN_X2
---
 .../backend/spirv/emit_spirv_atomic.cpp              |  8 ++++++++
 .../backend/spirv/emit_spirv_instructions.h          |  2 ++
 .../frontend/translate/vector_memory.cpp             |  4 ++++
 src/shader_recompiler/ir/ir_emitter.cpp              | 12 ++++++++++--
 src/shader_recompiler/ir/microinstruction.cpp        |  2 ++
 src/shader_recompiler/ir/opcodes.inc                 |  2 ++
 .../ir/passes/resource_tracking_pass.cpp             | 10 ++++++++++
 .../ir/passes/shader_info_collection_pass.cpp        |  2 ++
 8 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index 85e93f3fb..e37acb2e4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -200,10 +200,18 @@ Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
     return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
 }
 
+Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+    return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
+}
+
 Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
     return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
 }
 
+Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+    return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
+}
+
 Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
     if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
         return BufferAtomicU32<true>(ctx, inst, handle, address, value,
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 08ea2c1cd..1ac2266bd 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -91,7 +91,9 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
 Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index 8dcf70a07..91f545cfd 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -74,8 +74,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
         return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
     case Opcode::BUFFER_ATOMIC_SMIN:
         return BUFFER_ATOMIC(AtomicOp::Smin, inst);
+    case Opcode::BUFFER_ATOMIC_SMIN_X2:
+        return BUFFER_ATOMIC<IR::U64>(AtomicOp::Smin, inst);
     case Opcode::BUFFER_ATOMIC_UMIN:
         return BUFFER_ATOMIC(AtomicOp::Umin, inst);
+    case Opcode::BUFFER_ATOMIC_UMIN_X2:
+        return BUFFER_ATOMIC<IR::U64>(AtomicOp::Umin, inst);
     case Opcode::BUFFER_ATOMIC_SMAX:
         return BUFFER_ATOMIC(AtomicOp::Smax, inst);
     case Opcode::BUFFER_ATOMIC_SMAX_X2:
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 1f30a9565..3d64cc5da 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -500,8 +500,16 @@ Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, con
 
 Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
                                   bool is_signed, BufferInstInfo info) {
-    return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
-                     : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
+    switch (value.Type()) {
+    case Type::U32:
+        return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
+                         : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
+    case Type::U64:
+        return is_signed ? Inst(Opcode::BufferAtomicSMin64, Flags{info}, handle, address, value)
+                         : Inst(Opcode::BufferAtomicUMin64, Flags{info}, handle, address, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
 }
 
 Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value,
diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp
index 8d46a0071..84bdb5739 100644
--- a/src/shader_recompiler/ir/microinstruction.cpp
+++ b/src/shader_recompiler/ir/microinstruction.cpp
@@ -70,7 +70,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::BufferAtomicIAdd64:
     case Opcode::BufferAtomicISub32:
     case Opcode::BufferAtomicSMin32:
+    case Opcode::BufferAtomicSMin64:
     case Opcode::BufferAtomicUMin32:
+    case Opcode::BufferAtomicUMin64:
     case Opcode::BufferAtomicFMin32:
     case Opcode::BufferAtomicSMax32:
     case Opcode::BufferAtomicSMax64:
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index d177017f2..008f44659 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -124,7 +124,9 @@ OPCODE(BufferAtomicIAdd32,                                  U32,            Opaq
 OPCODE(BufferAtomicIAdd64,                                  U64,            Opaque,         Opaque,         U64                                             )
 OPCODE(BufferAtomicISub32,                                  U32,            Opaque,         Opaque,         U32                                             )
 OPCODE(BufferAtomicSMin32,                                  U32,            Opaque,         Opaque,         U32                                             )
+OPCODE(BufferAtomicSMin64,                                  U64,            Opaque,         Opaque,         U64                                             )
 OPCODE(BufferAtomicUMin32,                                  U32,            Opaque,         Opaque,         U32                                             )
+OPCODE(BufferAtomicUMin64,                                  U64,            Opaque,         Opaque,         U64                                             )
 OPCODE(BufferAtomicFMin32,                                  U32,            Opaque,         Opaque,         F32                                             )
 OPCODE(BufferAtomicSMax32,                                  U32,            Opaque,         Opaque,         U32                                             )
 OPCODE(BufferAtomicSMax64,                                  U64,            Opaque,         Opaque,         U64                                             )
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index ffb785584..d5d140c93 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -20,7 +20,9 @@ bool IsBufferAtomic(const IR::Inst& inst) {
     case IR::Opcode::BufferAtomicIAdd64:
     case IR::Opcode::BufferAtomicISub32:
     case IR::Opcode::BufferAtomicSMin32:
+    case IR::Opcode::BufferAtomicSMin64:
     case IR::Opcode::BufferAtomicUMin32:
+    case IR::Opcode::BufferAtomicUMin64:
     case IR::Opcode::BufferAtomicFMin32:
     case IR::Opcode::BufferAtomicSMax32:
     case IR::Opcode::BufferAtomicSMax64:
@@ -97,6 +99,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
     case IR::Opcode::LoadBufferU64:
     case IR::Opcode::StoreBufferU64:
     case IR::Opcode::BufferAtomicIAdd64:
+    case IR::Opcode::BufferAtomicSMax64:
+    case IR::Opcode::BufferAtomicSMin64:
+    case IR::Opcode::BufferAtomicUMax64:
+    case IR::Opcode::BufferAtomicUMin64:
         return IR::Type::U64;
     case IR::Opcode::LoadBufferFormatF32:
     case IR::Opcode::StoreBufferFormatF32:
@@ -118,6 +124,10 @@ u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
     case IR::Opcode::LoadBufferU64:
     case IR::Opcode::StoreBufferU64:
     case IR::Opcode::BufferAtomicIAdd64:
+    case IR::Opcode::BufferAtomicSMax64:
+    case IR::Opcode::BufferAtomicSMin64:
+    case IR::Opcode::BufferAtomicUMax64:
+    case IR::Opcode::BufferAtomicUMin64:
         return 3;
     case IR::Opcode::LoadBufferFormatF32:
     case IR::Opcode::StoreBufferFormatF32: {
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index 59668870b..472ff7678 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -102,7 +102,9 @@ void Visit(Info& info, const IR::Inst& inst) {
         break;
     case IR::Opcode::BufferAtomicIAdd64:
     case IR::Opcode::BufferAtomicSMax64:
+    case IR::Opcode::BufferAtomicSMin64:
     case IR::Opcode::BufferAtomicUMax64:
+    case IR::Opcode::BufferAtomicUMin64:
         info.uses_buffer_int64_atomics = true;
         break;
     case IR::Opcode::LaneId:

From 48460d1cbe550b224b463cbcb8c0c7324c74b77e Mon Sep 17 00:00:00 2001
From: TheTurtle <geoster3d@gmail.com>
Date: Thu, 3 Jul 2025 13:19:38 +0300
Subject: [PATCH 4/5] vector_alu: Improve handling of mbcnt append/consume
 patterns (#3184)

* vector_alu: Improve handling of mbcnt append/consume patterns

The existing implementation was written to handle a single pattern of mbcnt before the DS_APPEND instruction

v_mbcnt_hi_u32_b32 vX, exec_hi, 0
v_mbcnt_lo_u32_b32 vX, exec_lo, vX
ds_append       vY offset:4 gds
v_add_i32       vX, vcc, vY, vX

In this case however the DS_APPEND is before the mbcnt pattern

ds_append       vX gds
v_mbcnt_hi_u32_b32 vY, exec_hi, vX
v_mbcnt_lo_u32_b32 vZ, exec_lo, vY

The mbcnt instructions are always in pairs of hi/lo and in general are quite flexible. But they assume the subgroup size is 64 so they are not recompiled literally. Together with DS_APPEND they are used to derive a unique per thread index in a buffer (different from using thread_id as order could be random). DS_APPEND instruction works on per subgroup level, by adding number of active threads of subgroup to the GDS counter, essentially giving a multiple-of-64 base index to all threads. Then each thread executes the mbcnt pair which returns the number of active threads with id less than the itself and adds it with the base.

The recompiler translates DS_APPEND into an atomic increment of a storage buffer counter, which already gives the desired unique index, so this pattern is a no-op. On main it was set to zero as per the first pattern to avoid altering the DS_APPEND result. The new handling passes through the initial value of the pattern instead, which has the same effect but works on either case.

* vk_rasterizer: Always sync DMA buffers
---
 .../frontend/translate/vector_alu.cpp         | 26 +++++++++++--------
 .../renderer_vulkan/vk_rasterizer.cpp         |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index 448622f0e..54f1088f2 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -558,27 +558,31 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
 
 void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
     if (!is_low) {
-        // v_mbcnt_hi_u32_b32 v2, -1, 0
+        // v_mbcnt_hi_u32_b32 vX, -1, 0
         if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
             inst.src[1].field == OperandField::ConstZero) {
             return;
         }
-        // v_mbcnt_hi_u32_b32 vX, exec_hi, 0
-        if (inst.src[0].field == OperandField::ExecHi &&
-            inst.src[1].field == OperandField::ConstZero) {
-            return;
+        // v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
+        if ((inst.src[0].field == OperandField::ExecHi ||
+             inst.src[0].field == OperandField::VccHi) &&
+            (inst.src[1].field == OperandField::ConstZero ||
+             inst.src[1].field == OperandField::VectorGPR)) {
+            return SetDst(inst.dst[0], GetSrc(inst.src[1]));
         }
+        UNREACHABLE();
     } else {
-        // v_mbcnt_lo_u32_b32 v2, -1, vX
+        // v_mbcnt_lo_u32_b32 vY, -1, vX
         // used combined with above to fetch lane id in non-compute stages
         if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
-            SetDst(inst.dst[0], ir.LaneId());
+            return SetDst(inst.dst[0], ir.LaneId());
         }
-        // v_mbcnt_lo_u32_b32 v20, exec_lo, vX
-        // used combined in above for append buffer indexing.
-        if (inst.src[0].field == OperandField::ExecLo) {
-            SetDst(inst.dst[0], ir.Imm32(0));
+        // v_mbcnt_lo_u32_b32 vY, exec_lo, vX
+        // used combined with above for append buffer indexing.
+        if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
+            return SetDst(inst.dst[0], GetSrc(inst.src[1]));
         }
+        UNREACHABLE();
     }
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 0aad0f047..514de1743 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -471,7 +471,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
         uses_dma |= stage->uses_dma;
     }
 
-    if (uses_dma && !fault_process_pending) {
+    if (uses_dma) {
         // We only use fault buffer for DMA right now.
         {
             Common::RecursiveSharedLock lock{mapped_ranges_mutex};

From df22c4225ed898a44cb1af82cef845fc2f7f34bb Mon Sep 17 00:00:00 2001
From: TheTurtle <geoster3d@gmail.com>
Date: Thu, 3 Jul 2025 20:03:06 +0300
Subject: [PATCH 5/5] config: Add toggle for DMA (#3185)

* config: Add toggle for DMA

* config: Log new config
---
 src/common/config.cpp                         | 11 ++++++++++
 src/common/config.h                           |  2 ++
 src/emulator.cpp                              |  1 +
 .../spirv/emit_spirv_context_get_set.cpp      |  4 ++++
 .../backend/spirv/spirv_emit_context.cpp      | 22 +------------------
 .../backend/spirv/spirv_emit_context.h        | 10 +++++++++
 src/shader_recompiler/ir/passes/ir_passes.h   |  2 +-
 .../ir/passes/shader_info_collection_pass.cpp | 22 ++++++++++++++++++-
 src/shader_recompiler/recompiler.cpp          |  2 +-
 9 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/src/common/config.cpp b/src/common/config.cpp
index 0b5f11200..4a764a4c6 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -52,6 +52,7 @@ static std::string isSideTrophy = "right";
 static bool isNullGpu = false;
 static bool shouldCopyGPUBuffers = false;
 static bool readbacksEnabled = false;
+static bool directMemoryAccessEnabled = false;
 static bool shouldDumpShaders = false;
 static bool shouldPatchShaders = true;
 static u32 vblankDivider = 1;
@@ -245,6 +246,10 @@ bool readbacks() {
     return readbacksEnabled;
 }
 
+bool directMemoryAccess() {
+    return directMemoryAccessEnabled;
+}
+
 bool dumpShaders() {
     return shouldDumpShaders;
 }
@@ -353,6 +358,10 @@ void setReadbacks(bool enable) {
     readbacksEnabled = enable;
 }
 
+void setDirectMemoryAccess(bool enable) {
+    directMemoryAccessEnabled = enable;
+}
+
 void setDumpShaders(bool enable) {
     shouldDumpShaders = enable;
 }
@@ -596,6 +605,7 @@ void load(const std::filesystem::path& path) {
         isNullGpu = toml::find_or<bool>(gpu, "nullGpu", false);
         shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", false);
         readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", false);
+        directMemoryAccessEnabled = toml::find_or<bool>(gpu, "directMemoryAccess", false);
         shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", false);
         shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", true);
         vblankDivider = toml::find_or<int>(gpu, "vblankDivider", 1);
@@ -746,6 +756,7 @@ void save(const std::filesystem::path& path) {
     data["GPU"]["nullGpu"] = isNullGpu;
     data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
     data["GPU"]["readbacks"] = readbacksEnabled;
+    data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled;
     data["GPU"]["dumpShaders"] = shouldDumpShaders;
     data["GPU"]["patchShaders"] = shouldPatchShaders;
     data["GPU"]["vblankDivider"] = vblankDivider;
diff --git a/src/common/config.h b/src/common/config.h
index 219461e7e..931fa68e2 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -47,6 +47,8 @@ bool copyGPUCmdBuffers();
 void setCopyGPUCmdBuffers(bool enable);
 bool readbacks();
 void setReadbacks(bool enable);
+bool directMemoryAccess();
+void setDirectMemoryAccess(bool enable);
 bool dumpShaders();
 void setDumpShaders(bool enable);
 u32 vblankDiv();
diff --git a/src/emulator.cpp b/src/emulator.cpp
index d6d523fa0..fbab5929b 100644
--- a/src/emulator.cpp
+++ b/src/emulator.cpp
@@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
     LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
     LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
     LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
+    LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
     LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
     LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
     LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 564fb3f80..f3a8c518c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "common/assert.h"
+#include "common/config.h"
 #include "common/logging/log.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@@ -167,6 +168,9 @@ using PointerSize = EmitContext::PointerSize;
 
 Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
     const u32 flatbuf_off_dw = inst->Flags<u32>();
+    if (!Config::directMemoryAccess()) {
+        return ctx.EmitFlatbufferLoad(ctx.ConstU32(flatbuf_off_dw));
+    }
     // We can only provide a fallback for immediate offsets.
     if (flatbuf_off_dw == 0) {
         return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 524914ad4..77336c9ec 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -784,19 +784,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
 };
 
 void EmitContext::DefineBuffers() {
-    if (!profile.supports_robust_buffer_access && !info.uses_dma) {
-        // In case Flatbuf has not already been bound by IR and is needed
-        // to query buffer sizes, bind it now.
-        info.buffers.push_back({
-            .used_types = IR::Type::U32,
-            // We can't guarantee that flatbuf will not grow past UBO
-            // limit if there are a lot of ReadConsts. (We could specialize)
-            .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
-            .buffer_type = BufferType::Flatbuf,
-        });
-        // In the future we may want to read buffer sizes from GPU memory if available.
-        // info.readconst_types |= Info::ReadConstType::Immediate;
-    }
     for (const auto& desc : info.buffers) {
         const auto buf_sharp = desc.GetSharp(info);
         const bool is_storage = desc.IsStorage(buf_sharp, profile);
@@ -1219,14 +1206,7 @@ Id EmitContext::DefineReadConst(bool dynamic) {
         if (dynamic) {
             return u32_zero_value;
         } else {
-            const auto& flatbuf_buffer{buffers[flatbuf_index]};
-            ASSERT(flatbuf_buffer.binding >= 0 &&
-                   flatbuf_buffer.buffer_type == BufferType::Flatbuf);
-            const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
-                flatbuf_buffer.Alias(PointerType::U32);
-            const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
-                                         flatbuf_offset)};
-            return OpLoad(U32[1], ptr);
+            return EmitFlatbufferLoad(flatbuf_offset);
         }
     });
 
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index f8c6416e8..28e9099d8 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -180,6 +180,16 @@ public:
         return OpAccessChain(result_type, shared_mem, index);
     }
 
+    Id EmitFlatbufferLoad(Id flatbuf_offset) {
+        const auto& flatbuf_buffer{buffers[flatbuf_index]};
+        ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf);
+        const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
+            flatbuf_buffer.aliases[u32(PointerType::U32)];
+        const auto ptr{
+            OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)};
+        return OpLoad(U32[1], ptr);
+    }
+
     Info& info;
     const RuntimeInfo& runtime_info;
     const Profile& profile;
diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h
index 57d36f6df..fdae9d3cf 100644
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -19,7 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program);
 void FlattenExtendedUserdataPass(IR::Program& program);
 void ReadLaneEliminationPass(IR::Program& program);
 void ResourceTrackingPass(IR::Program& program);
-void CollectShaderInfoPass(IR::Program& program);
+void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
 void LowerBufferFormatToRaw(IR::Program& program);
 void LowerFp64ToFp32(IR::Program& program);
 void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index 472ff7678..a87dceb0a 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include "common/config.h"
 #include "shader_recompiler/ir/program.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 
@@ -138,7 +139,7 @@ void Visit(Info& info, const IR::Inst& inst) {
     }
 }
 
-void CollectShaderInfoPass(IR::Program& program) {
+void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
     auto& info = program.info;
     for (IR::Block* const block : program.post_order_blocks) {
         for (IR::Inst& inst : block->Instructions()) {
@@ -146,6 +147,25 @@ void CollectShaderInfoPass(IR::Program& program) {
         }
     }
 
+    // In case Flatbuf has not already been bound by IR and is needed
+    // to query buffer sizes, bind it now.
+    if (!profile.supports_robust_buffer_access && !info.uses_dma) {
+        info.buffers.push_back({
+            .used_types = IR::Type::U32,
+            // We can't guarantee that flatbuf will not grow past UBO
+            // limit if there are a lot of ReadConsts. (We could specialize)
+            .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
+            .buffer_type = BufferType::Flatbuf,
+        });
+        // In the future we may want to read buffer sizes from GPU memory if available.
+        // info.readconst_types |= Info::ReadConstType::Immediate;
+    }
+
+    if (!Config::directMemoryAccess()) {
+        info.uses_dma = false;
+        info.readconst_types = Info::ReadConstType::None;
+    }
+
     if (info.uses_dma) {
         info.buffers.push_back({
             .used_types = IR::Type::U64,
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index e17fb1c9e..2da9e7b01 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -84,7 +84,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
     Shader::Optimization::IdentityRemovalPass(program.blocks);
     Shader::Optimization::DeadCodeEliminationPass(program);
     Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
-    Shader::Optimization::CollectShaderInfoPass(program);
+    Shader::Optimization::CollectShaderInfoPass(program, profile);
 
     Shader::IR::DumpProgram(program, info);