diff --git a/CMakeLists.txt b/CMakeLists.txt
index 466933608..38532760d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -689,6 +689,7 @@ set(COMMON src/common/logging/backend.cpp
            src/common/recursive_lock.cpp
            src/common/recursive_lock.h
            src/common/sha1.h
+           src/common/shared_first_mutex.h
            src/common/signal_context.h
            src/common/signal_context.cpp
            src/common/singleton.h
diff --git a/src/common/config.cpp b/src/common/config.cpp
index 462dd05c7..eded244d6 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -65,6 +65,7 @@ static u32 screenHeight = 720;
 static bool isNullGpu = false;
 static bool shouldCopyGPUBuffers = false;
 static bool readbacksEnabled = false;
+static bool directMemoryAccessEnabled = false;
 static bool shouldDumpShaders = false;
 static bool shouldPatchShaders = false;
 static u32 vblankDivider = 1;
@@ -102,7 +103,7 @@ u32 m_language = 1; // english
 static std::string trophyKey = "";
 
 // Expected number of items in the config file
-static constexpr u64 total_entries = 50;
+static constexpr u64 total_entries = 51;
 
 bool allowHDR() {
     return isHDRAllowed;
@@ -261,6 +262,10 @@ bool readbacks() {
     return readbacksEnabled;
 }
 
+bool directMemoryAccess() {
+    return directMemoryAccessEnabled;
+}
+
 bool dumpShaders() {
     return shouldDumpShaders;
 }
@@ -369,6 +374,10 @@ void setReadbacks(bool enable) {
     readbacksEnabled = enable;
 }
 
+void setDirectMemoryAccess(bool enable) {
+    directMemoryAccessEnabled = enable;
+}
+
 void setDumpShaders(bool enable) {
     shouldDumpShaders = enable;
 }
@@ -622,6 +631,7 @@ void load(const std::filesystem::path& path) {
         isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu);
         shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers);
         readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled);
+        directMemoryAccessEnabled = toml::find_or<bool>(gpu, "directMemoryAccess", directMemoryAccessEnabled);
         shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders);
         shouldPatchShaders = toml::find_or<bool>(gpu, "patchShaders", shouldPatchShaders);
         vblankDivider = toml::find_or<int>(gpu, "vblankDivider", vblankDivider);
@@ -791,6 +801,7 @@ void save(const std::filesystem::path& path) {
     data["GPU"]["nullGpu"] = isNullGpu;
     data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
     data["GPU"]["readbacks"] = readbacksEnabled;
+    data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled;
     data["GPU"]["dumpShaders"] = shouldDumpShaders;
     data["GPU"]["patchShaders"] = shouldPatchShaders;
     data["GPU"]["vblankDivider"] = vblankDivider;
@@ -890,6 +901,7 @@ void setDefaultValues() {
     isNullGpu = false;
     shouldCopyGPUBuffers = false;
     readbacksEnabled = false;
+    directMemoryAccessEnabled = false;
     shouldDumpShaders = false;
     shouldPatchShaders = false;
     vblankDivider = 1;
diff --git a/src/common/config.h b/src/common/config.h
index 219461e7e..931fa68e2 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -47,6 +47,8 @@ bool copyGPUCmdBuffers();
 void setCopyGPUCmdBuffers(bool enable);
 bool readbacks();
 void setReadbacks(bool enable);
+bool directMemoryAccess();
+void setDirectMemoryAccess(bool enable);
 bool dumpShaders();
 void setDumpShaders(bool enable);
 u32 vblankDiv();
diff --git a/src/common/shared_first_mutex.h b/src/common/shared_first_mutex.h
new file mode 100644
index 000000000..b150c956b
--- /dev/null
+++ b/src/common/shared_first_mutex.h
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+
+namespace Common {
+
+// Like std::shared_mutex, but reader has priority over writer.
+class SharedFirstMutex {
+public:
+    void lock() {
+        std::unique_lock<std::mutex> lock(mtx);
+        cv.wait(lock, [this]() { return !writer_active && readers == 0; });
+        writer_active = true;
+    }
+
+    void unlock() {
+        std::lock_guard<std::mutex> lock(mtx);
+        writer_active = false;
+        cv.notify_all();
+    }
+
+    void lock_shared() {
+        std::unique_lock<std::mutex> lock(mtx);
+        cv.wait(lock, [this]() { return !writer_active; });
+        ++readers;
+    }
+
+    void unlock_shared() {
+        std::lock_guard<std::mutex> lock(mtx);
+        if (--readers == 0) {
+            cv.notify_all();
+        }
+    }
+
+private:
+    std::mutex mtx;
+    std::condition_variable cv;
+    int readers = 0;
+    bool writer_active = false;
+};
+
+} // namespace Common
diff --git a/src/emulator.cpp b/src/emulator.cpp
index d6d523fa0..fbab5929b 100644
--- a/src/emulator.cpp
+++ b/src/emulator.cpp
@@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
     LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
     LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
     LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
+    LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
     LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
     LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
     LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index 85e93f3fb..e37acb2e4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -200,10 +200,18 @@ Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
     return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
 }
 
+Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+    return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
+}
+
 Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
     return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
 }
 
+Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+    return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
+}
+
 Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
     if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
         return BufferAtomicU32<true>(ctx, inst, handle, address, value,
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 564fb3f80..f3a8c518c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "common/assert.h"
+#include "common/config.h"
 #include "common/logging/log.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@@ -167,6 +168,9 @@ using PointerSize = EmitContext::PointerSize;
 
 Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
     const u32 flatbuf_off_dw = inst->Flags<u32>();
+    if (!Config::directMemoryAccess()) {
+        return ctx.EmitFlatbufferLoad(ctx.ConstU32(flatbuf_off_dw));
+    }
     // We can only provide a fallback for immediate offsets.
     if (flatbuf_off_dw == 0) {
         return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 15a8fd99b..1ac2266bd 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -91,7 +91,9 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
 Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicSMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicUMin64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@@ -406,14 +408,20 @@ Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
 Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
 Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 1a995354d..ddc1e7574 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -371,19 +371,35 @@ Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs);
 }
 
@@ -395,11 +411,19 @@ Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpINotEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
 }
 
-Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs);
 }
 
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 524914ad4..77336c9ec 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -784,19 +784,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
 };
 
 void EmitContext::DefineBuffers() {
-    if (!profile.supports_robust_buffer_access && !info.uses_dma) {
-        // In case Flatbuf has not already been bound by IR and is needed
-        // to query buffer sizes, bind it now.
-        info.buffers.push_back({
-            .used_types = IR::Type::U32,
-            // We can't guarantee that flatbuf will not grow past UBO
-            // limit if there are a lot of ReadConsts. (We could specialize)
-            .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
-            .buffer_type = BufferType::Flatbuf,
-        });
-        // In the future we may want to read buffer sizes from GPU memory if available.
-        // info.readconst_types |= Info::ReadConstType::Immediate;
-    }
     for (const auto& desc : info.buffers) {
         const auto buf_sharp = desc.GetSharp(info);
         const bool is_storage = desc.IsStorage(buf_sharp, profile);
@@ -1219,14 +1206,7 @@ Id EmitContext::DefineReadConst(bool dynamic) {
         if (dynamic) {
             return u32_zero_value;
         } else {
-            const auto& flatbuf_buffer{buffers[flatbuf_index]};
-            ASSERT(flatbuf_buffer.binding >= 0 &&
-                   flatbuf_buffer.buffer_type == BufferType::Flatbuf);
-            const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
-                flatbuf_buffer.Alias(PointerType::U32);
-            const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
-                                         flatbuf_offset)};
-            return OpLoad(U32[1], ptr);
+            return EmitFlatbufferLoad(flatbuf_offset);
         }
     });
 
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index f8c6416e8..28e9099d8 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -180,6 +180,16 @@ public:
         return OpAccessChain(result_type, shared_mem, index);
     }
 
+    Id EmitFlatbufferLoad(Id flatbuf_offset) {
+        const auto& flatbuf_buffer{buffers[flatbuf_index]};
+        ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf);
+        const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
+            flatbuf_buffer.aliases[u32(PointerType::U32)];
+        const auto ptr{
+            OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)};
+        return OpLoad(U32[1], ptr);
+    }
+
     Info& info;
     const RuntimeInfo& runtime_info;
     const Profile& profile;
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index ece334bcd..b5bfec344 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -20,7 +20,7 @@ namespace Shader::Gcn {
 enum class ConditionOp : u32 {
     F,
     EQ,
-    LG,
+    LG, // NE
     GT,
     GE,
     LT,
@@ -230,7 +230,7 @@ public:
     // VOPC
     void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
     void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
-    void V_CMP_NE_U64(const GcnInst& inst);
+    void V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
     void V_CMP_CLASS_F32(const GcnInst& inst);
 
     // VOP3a
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index 3b88e4dec..54f1088f2 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -327,8 +327,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
         return V_CMP_U32(ConditionOp::TRU, false, true, inst);
 
         //     V_CMP_{OP8}_U64
+    case Opcode::V_CMP_EQ_U64:
+        return V_CMP_U64(ConditionOp::EQ, false, false, inst);
     case Opcode::V_CMP_NE_U64:
-        return V_CMP_NE_U64(inst);
+        return V_CMP_U64(ConditionOp::LG, false, false, inst);
 
     case Opcode::V_CMP_CLASS_F32:
         return V_CMP_CLASS_F32(inst);
@@ -556,27 +558,31 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
 
 void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
     if (!is_low) {
-        // v_mbcnt_hi_u32_b32 v2, -1, 0
+        // v_mbcnt_hi_u32_b32 vX, -1, 0
         if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
             inst.src[1].field == OperandField::ConstZero) {
             return;
         }
-        // v_mbcnt_hi_u32_b32 vX, exec_hi, 0
-        if (inst.src[0].field == OperandField::ExecHi &&
-            inst.src[1].field == OperandField::ConstZero) {
-            return;
+        // v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
+        if ((inst.src[0].field == OperandField::ExecHi ||
+             inst.src[0].field == OperandField::VccHi) &&
+            (inst.src[1].field == OperandField::ConstZero ||
+             inst.src[1].field == OperandField::VectorGPR)) {
+            return SetDst(inst.dst[0], GetSrc(inst.src[1]));
         }
+        UNREACHABLE();
     } else {
-        // v_mbcnt_lo_u32_b32 v2, -1, vX
+        // v_mbcnt_lo_u32_b32 vY, -1, vX
         // used combined with above to fetch lane id in non-compute stages
         if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
-            SetDst(inst.dst[0], ir.LaneId());
+            return SetDst(inst.dst[0], ir.LaneId());
         }
-        // v_mbcnt_lo_u32_b32 v20, exec_lo, vX
-        // used combined in above for append buffer indexing.
-        if (inst.src[0].field == OperandField::ExecLo) {
-            SetDst(inst.dst[0], ir.Imm32(0));
+        // v_mbcnt_lo_u32_b32 vY, exec_lo, vX
+        // used combined with above for append buffer indexing.
+        if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
+            return SetDst(inst.dst[0], GetSrc(inst.src[1]));
         }
+        UNREACHABLE();
     }
 }
 
@@ -996,39 +1002,32 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const
     }
 }
 
-void Translator::V_CMP_NE_U64(const GcnInst& inst) {
-    const auto get_src = [&](const InstOperand& operand) {
-        switch (operand.field) {
-        case OperandField::VccLo:
-            return ir.GetVcc();
-        case OperandField::ExecLo:
-            return ir.GetExec();
-        case OperandField::ScalarGPR:
-            return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
-        case OperandField::ConstZero:
-            return ir.Imm1(false);
+void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
+    const IR::U64 src0{GetSrc64(inst.src[0])};
+    const IR::U64 src1{GetSrc64(inst.src[1])};
+    const IR::U1 result = [&] {
+        switch (op) {
+        case ConditionOp::EQ:
+            return ir.IEqual(src0, src1);
+        case ConditionOp::LG: // NE
+            return ir.INotEqual(src0, src1);
         default:
-            UNREACHABLE();
+            UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op));
         }
-    };
-    const IR::U1 src0{get_src(inst.src[0])};
-    auto op = [&inst, this](auto x) {
-        switch (inst.src[1].field) {
-        case OperandField::ConstZero:
-            return x;
-        case OperandField::SignedConstIntNeg:
-            return ir.LogicalNot(x);
-        default:
-            UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field));
-        }
-    };
+    }();
+
+    if (is_signed) {
+        UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported");
+    }
+    if (set_exec) {
+        UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported");
+    }
+
     switch (inst.dst[1].field) {
     case OperandField::VccLo:
-        ir.SetVcc(op(src0));
-        break;
+        return ir.SetVcc(result);
     case OperandField::ScalarGPR:
-        ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0));
-        break;
+        return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
     default:
         UNREACHABLE();
     }
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index 8dcf70a07..91f545cfd 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -74,8 +74,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
         return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
     case Opcode::BUFFER_ATOMIC_SMIN:
         return BUFFER_ATOMIC(AtomicOp::Smin, inst);
+    case Opcode::BUFFER_ATOMIC_SMIN_X2:
+        return BUFFER_ATOMIC<IR::U64>(AtomicOp::Smin, inst);
     case Opcode::BUFFER_ATOMIC_UMIN:
         return BUFFER_ATOMIC(AtomicOp::Umin, inst);
+    case Opcode::BUFFER_ATOMIC_UMIN_X2:
+        return BUFFER_ATOMIC<IR::U64>(AtomicOp::Umin, inst);
     case Opcode::BUFFER_ATOMIC_SMAX:
         return BUFFER_ATOMIC(AtomicOp::Smax, inst);
     case Opcode::BUFFER_ATOMIC_SMAX_X2:
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 2497864c0..3d64cc5da 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -500,8 +500,16 @@ Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, con
 
 Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
                                   bool is_signed, BufferInstInfo info) {
-    return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
-                     : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
+    switch (value.Type()) {
+    case Type::U32:
+        return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
+                         : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
+    case Type::U64:
+        return is_signed ? Inst(Opcode::BufferAtomicSMin64, Flags{info}, handle, address, value)
+                         : Inst(Opcode::BufferAtomicUMin64, Flags{info}, handle, address, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
 }
 
 Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value,
@@ -1712,12 +1720,32 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
     }
 }
 
-U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
-    return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+U1 IREmitter::ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
+    if (lhs.Type() != rhs.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(is_signed ? Opcode::SLessThanEqual32 : Opcode::ULessThanEqual32, lhs, rhs);
+    case Type::U64:
+        return Inst<U1>(is_signed ? Opcode::SLessThanEqual64 : Opcode::ULessThanEqual64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
-U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
-    return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+U1 IREmitter::IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
+    if (lhs.Type() != rhs.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThan32 : Opcode::UGreaterThan32, lhs, rhs);
+    case Type::U64:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThan64 : Opcode::UGreaterThan64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
 U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
@@ -1734,8 +1762,20 @@ U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) {
     }
 }
 
-U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
-    return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+U1 IREmitter::IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
+    if (lhs.Type() != rhs.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual32 : Opcode::UGreaterThanEqual32, lhs,
+                        rhs);
+    case Type::U64:
+        return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual64 : Opcode::UGreaterThanEqual64, lhs,
+                        rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
 U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index 9e2f79978..119e3752e 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -299,10 +299,10 @@ public:
 
     [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
     [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
-    [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
-    [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
     [[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs);
-    [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed);
 
     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp
index 8d46a0071..84bdb5739 100644
--- a/src/shader_recompiler/ir/microinstruction.cpp
+++ b/src/shader_recompiler/ir/microinstruction.cpp
@@ -70,7 +70,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::BufferAtomicIAdd64:
     case Opcode::BufferAtomicISub32:
     case Opcode::BufferAtomicSMin32:
+    case Opcode::BufferAtomicSMin64:
     case Opcode::BufferAtomicUMin32:
+    case Opcode::BufferAtomicUMin64:
     case Opcode::BufferAtomicFMin32:
     case Opcode::BufferAtomicSMax32:
     case Opcode::BufferAtomicSMax64:
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index 7fc514de9..008f44659 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -124,7 +124,9 @@ OPCODE(BufferAtomicIAdd32,                                  U32,            Opaq
 OPCODE(BufferAtomicIAdd64,                                  U64,            Opaque,         Opaque,         U64                                             )
 OPCODE(BufferAtomicISub32,                                  U32,            Opaque,         Opaque,         U32                                             )
 OPCODE(BufferAtomicSMin32,                                  U32,            Opaque,         Opaque,         U32                                             )
+OPCODE(BufferAtomicSMin64,                                  U64,            Opaque,         Opaque,         U64                                             )
 OPCODE(BufferAtomicUMin32,                                  U32,            Opaque,         Opaque,         U32                                             )
+OPCODE(BufferAtomicUMin64,                                  U64,            Opaque,         Opaque,         U64                                             )
 OPCODE(BufferAtomicFMin32,                                  U32,            Opaque,         Opaque,         F32                                             )
 OPCODE(BufferAtomicSMax32,                                  U32,            Opaque,         Opaque,         U32                                             )
 OPCODE(BufferAtomicSMax64,                                  U64,            Opaque,         Opaque,         U64                                             )
@@ -382,14 +384,20 @@ OPCODE(ULessThan32,                                         U1,             U32,
 OPCODE(ULessThan64,                                         U1,             U64,            U64,                                                            )
 OPCODE(IEqual32,                                            U1,             U32,            U32,                                                            )
 OPCODE(IEqual64,                                            U1,             U64,            U64,                                                            )
-OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                                            )
-OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                                            )
-OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                                            )
-OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual32,                                    U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual64,                                    U1,             U64,            U64,                                                            )
+OPCODE(ULessThanEqual32,                                    U1,             U32,            U32,                                                            )
+OPCODE(ULessThanEqual64,                                    U1,             U64,            U64,                                                            )
+OPCODE(SGreaterThan32,                                      U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThan64,                                      U1,             U64,            U64,                                                            )
+OPCODE(UGreaterThan32,                                      U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThan64,                                      U1,             U64,            U64,                                                            )
 OPCODE(INotEqual32,                                         U1,             U32,            U32,                                                            )
 OPCODE(INotEqual64,                                         U1,             U64,            U64,                                                            )
-OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
-OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual32,                                 U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual64,                                 U1,             U64,            U64,                                                            )
+OPCODE(UGreaterThanEqual32,                                 U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThanEqual64,                                 U1,             U64,            U64,                                                            )
 
 // Logical operations
 OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
index 5c66b1115..2a39d3a2e 100644
--- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
@@ -381,24 +381,42 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
     case IR::Opcode::ULessThan64:
         FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; });
         return;
-    case IR::Opcode::SLessThanEqual:
+    case IR::Opcode::SLessThanEqual32:
         FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
         return;
-    case IR::Opcode::ULessThanEqual:
+    case IR::Opcode::SLessThanEqual64:
+        FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a <= b; });
+        return;
+    case IR::Opcode::ULessThanEqual32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
         return;
-    case IR::Opcode::SGreaterThan:
+    case IR::Opcode::ULessThanEqual64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a <= b; });
+        return;
+    case IR::Opcode::SGreaterThan32:
         FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
         return;
-    case IR::Opcode::UGreaterThan:
+    case IR::Opcode::SGreaterThan64:
+        FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a > b; });
+        return;
+    case IR::Opcode::UGreaterThan32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
         return;
-    case IR::Opcode::SGreaterThanEqual:
+    case IR::Opcode::UGreaterThan64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a > b; });
+        return;
+    case IR::Opcode::SGreaterThanEqual32:
         FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
         return;
-    case IR::Opcode::UGreaterThanEqual:
+    case IR::Opcode::SGreaterThanEqual64:
+        FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a >= b; });
+        return;
+    case IR::Opcode::UGreaterThanEqual32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
         return;
+    case IR::Opcode::UGreaterThanEqual64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a >= b; });
+        return;
     case IR::Opcode::IEqual32:
         FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
         return;
diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h
index 57d36f6df..fdae9d3cf 100644
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -19,7 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program);
 void FlattenExtendedUserdataPass(IR::Program& program);
 void ReadLaneEliminationPass(IR::Program& program);
 void ResourceTrackingPass(IR::Program& program);
-void CollectShaderInfoPass(IR::Program& program);
+void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
 void LowerBufferFormatToRaw(IR::Program& program);
 void LowerFp64ToFp32(IR::Program& program);
 void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index ffb785584..d5d140c93 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -20,7 +20,9 @@ bool IsBufferAtomic(const IR::Inst& inst) {
     case IR::Opcode::BufferAtomicIAdd64:
     case IR::Opcode::BufferAtomicISub32:
     case IR::Opcode::BufferAtomicSMin32:
+    case IR::Opcode::BufferAtomicSMin64:
     case IR::Opcode::BufferAtomicUMin32:
+    case IR::Opcode::BufferAtomicUMin64:
     case IR::Opcode::BufferAtomicFMin32:
     case IR::Opcode::BufferAtomicSMax32:
     case IR::Opcode::BufferAtomicSMax64:
@@ -97,6 +99,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
     case IR::Opcode::LoadBufferU64:
     case IR::Opcode::StoreBufferU64:
     case IR::Opcode::BufferAtomicIAdd64:
+    case IR::Opcode::BufferAtomicSMax64:
+    case IR::Opcode::BufferAtomicSMin64:
+    case IR::Opcode::BufferAtomicUMax64:
+    case IR::Opcode::BufferAtomicUMin64:
         return IR::Type::U64;
     case IR::Opcode::LoadBufferFormatF32:
     case IR::Opcode::StoreBufferFormatF32:
@@ -118,6 +124,10 @@ u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
     case IR::Opcode::LoadBufferU64:
     case IR::Opcode::StoreBufferU64:
     case IR::Opcode::BufferAtomicIAdd64:
+    case IR::Opcode::BufferAtomicSMax64:
+    case IR::Opcode::BufferAtomicSMin64:
+    case IR::Opcode::BufferAtomicUMax64:
+    case IR::Opcode::BufferAtomicUMin64:
         return 3;
     case IR::Opcode::LoadBufferFormatF32:
     case IR::Opcode::StoreBufferFormatF32: {
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index 59668870b..a87dceb0a 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include "common/config.h"
 #include "shader_recompiler/ir/program.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 
@@ -102,7 +103,9 @@ void Visit(Info& info, const IR::Inst& inst) {
         break;
     case IR::Opcode::BufferAtomicIAdd64:
     case IR::Opcode::BufferAtomicSMax64:
+    case IR::Opcode::BufferAtomicSMin64:
     case IR::Opcode::BufferAtomicUMax64:
+    case IR::Opcode::BufferAtomicUMin64:
         info.uses_buffer_int64_atomics = true;
         break;
     case IR::Opcode::LaneId:
@@ -136,7 +139,7 @@ void Visit(Info& info, const IR::Inst& inst) {
     }
 }
 
-void CollectShaderInfoPass(IR::Program& program) {
+void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
     auto& info = program.info;
     for (IR::Block* const block : program.post_order_blocks) {
         for (IR::Inst& inst : block->Instructions()) {
@@ -144,6 +147,25 @@ void CollectShaderInfoPass(IR::Program& program) {
         }
     }
 
+    // In case Flatbuf has not already been bound by IR and is needed
+    // to query buffer sizes, bind it now.
+    if (!profile.supports_robust_buffer_access && !info.uses_dma) {
+        info.buffers.push_back({
+            .used_types = IR::Type::U32,
+            // We can't guarantee that flatbuf will not grow past UBO
+            // limit if there are a lot of ReadConsts. (We could specialize)
+            .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
+            .buffer_type = BufferType::Flatbuf,
+        });
+        // In the future we may want to read buffer sizes from GPU memory if available.
+        // info.readconst_types |= Info::ReadConstType::Immediate;
+    }
+
+    if (!Config::directMemoryAccess()) {
+        info.uses_dma = false;
+        info.readconst_types = Info::ReadConstType::None;
+    }
+
     if (info.uses_dma) {
         info.buffers.push_back({
             .used_types = IR::Type::U64,
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index e17fb1c9e..2da9e7b01 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -84,7 +84,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
     Shader::Optimization::IdentityRemovalPass(program.blocks);
     Shader::Optimization::DeadCodeEliminationPass(program);
     Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
-    Shader::Optimization::CollectShaderInfoPass(program);
+    Shader::Optimization::CollectShaderInfoPass(program, profile);
 
     Shader::IR::DumpProgram(program, info);
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 0aad0f047..514de1743 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -471,7 +471,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
         uses_dma |= stage->uses_dma;
     }
 
-    if (uses_dma && !fault_process_pending) {
+    if (uses_dma) {
         // We only use fault buffer for DMA right now.
         {
             Common::RecursiveSharedLock lock{mapped_ranges_mutex};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c570ea368..4a978746c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -5,6 +5,7 @@
 
 #include <shared_mutex>
 #include "common/recursive_lock.h"
+#include "common/shared_first_mutex.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/page_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -122,7 +123,7 @@ private:
     AmdGpu::Liverpool* liverpool;
     Core::MemoryManager* memory;
     boost::icl::interval_set<VAddr> mapped_ranges;
-    std::shared_mutex mapped_ranges_mutex;
+    Common::SharedFirstMutex mapped_ranges_mutex;
     PipelineCache pipeline_cache;
 
     boost::container::static_vector<