shader_recompiler: Emulate unnormalized sampler coordinates in shader. (#1762)

* shader_recompiler: Emulate unnormalized sampler coordinates in shader. * Address review comments.
2025-07-12 20:55:56 +00:00 · 2024-12-13 11:49:07 -08:00 · 2024-12-13 11:49:07 -08:00 · 028be3ba5d
commit 028be3ba5d
parent 306279901f
10 changed files with 78 additions and 12 deletions
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
    }
 }

+F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
+    if (a.Type() != b.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::F32:
+        return Inst<F32>(Opcode::FPDiv32, a, b);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPDiv64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
 F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
    if (a.Type() != b.Type() || a.Type() != c.Type()) {
        UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@ -158,6 +158,7 @@ public:
    [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
    [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
    [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
+    [[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
    [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);

    [[nodiscard]] F32F64 FPAbs(const F32F64& value);
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@ -184,6 +184,8 @@ OPCODE(FPMin32,                                             F32,            F32,
 OPCODE(FPMin64,                                             F64,            F64,            F64,                                                            )
 OPCODE(FPMul32,                                             F32,            F32,            F32,                                                            )
 OPCODE(FPMul64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPDiv32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPDiv64,                                             F64,            F64,            F64,                                                            )
 OPCODE(FPNeg32,                                             F32,            F32,                                                                            )
 OPCODE(FPNeg64,                                             F64,            F64,                                                                            )
 OPCODE(FPRecip32,                                           F32,            F32,                                                                            )
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -420,26 +420,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
                                 Descriptors& descriptors, const IR::Inst* producer,
                                 const u32 image_binding, const AmdGpu::Image& image) {
    // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
-    const u32 sampler_binding = [&] {
+    const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
        ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
        const IR::Value& handle = producer->Arg(1);
        // Inline sampler resource.
        if (handle.IsImmediate()) {
            LOG_WARNING(Render_Vulkan, "Inline sampler detected");
-            return descriptors.Add(SamplerResource{
+            const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
+            const auto binding = descriptors.Add(SamplerResource{
                .sharp_idx = std::numeric_limits<u32>::max(),
-                .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
+                .inline_sampler = inline_sampler,
            });
+            return {binding, inline_sampler};
        }
        // Normal sampler resource.
        const auto ssharp_handle = handle.InstRecursive();
        const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
        const auto ssharp = TrackSharp(ssharp_ud, info);
-        return descriptors.Add(SamplerResource{
+        const auto binding = descriptors.Add(SamplerResource{
            .sharp_idx = ssharp,
            .associated_image = image_binding,
            .disable_aniso = disable_aniso,
        });
+        return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
    }();

    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -539,28 +542,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
        }
    }();

+    const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
+    // Query dimensions of image if needed for normalization.
+    // We can't use the image sharp because it could be bound to a different image later.
+    const auto dimensions =
+        unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
+                     : IR::Value{};
+    const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value {
+        const auto coord = get_addr_reg(idx);
+        if (unnormalized) {
+            // Normalize the coordinate for sampling, dividing by its corresponding dimension.
+            return ir.FPDiv(coord,
+                            ir.BitCast<IR::F32>(IR::U32{ir.CompositeExtract(dimensions, dim_idx)}));
+        }
+        return coord;
+    };
+
    // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
    const IR::Value coords = [&] -> IR::Value {
        switch (image.GetType()) {
        case AmdGpu::ImageType::Color1D: // x
            addr_reg = addr_reg + 1;
-            return get_addr_reg(addr_reg - 1);
+            return get_coord(addr_reg - 1, 0);
        case AmdGpu::ImageType::Color1DArray: // x, slice
            [[fallthrough]];
        case AmdGpu::ImageType::Color2D: // x, y
            addr_reg = addr_reg + 2;
-            return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
+            return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
        case AmdGpu::ImageType::Color2DArray: // x, y, slice
            [[fallthrough]];
        case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
-            [[fallthrough]];
+            addr_reg = addr_reg + 3;
+            return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
+                                         get_addr_reg(addr_reg - 1));
        case AmdGpu::ImageType::Color3D: // x, y, z
            addr_reg = addr_reg + 3;
-            return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
-                                         get_addr_reg(addr_reg - 1));
+            return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
+                                         get_coord(addr_reg - 1, 2));
        case AmdGpu::ImageType::Cube: // x, y, face
            addr_reg = addr_reg + 3;
-            return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
+            return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
                                  get_addr_reg(addr_reg - 1), false, inst_info.is_array);
        default:
            UNREACHABLE();
--- a/src/shader_recompiler/ir/reg.h
+++ b/src/shader_recompiler/ir/reg.h
@ -40,7 +40,8 @@ union TextureInstInfo {
    BitField<6, 2, u32> gather_comp;
    BitField<8, 1, u32> has_derivatives;
    BitField<9, 1, u32> is_array;
-    BitField<10, 1, u32> is_gather;
+    BitField<10, 1, u32> is_unnormalized;
+    BitField<11, 1, u32> is_gather;
 };

 union BufferInstInfo {