mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-25 21:03:18 +00:00
shader_recompiler: Add swizzle support for unsupported formats. (#1869)
* shader_recompiler: Add swizzle support for unsupported formats. * renderer_vulkan: Rework MRT swizzles and add unsupported format swizzle support. * shader_recompiler: Clean up swizzle handling and handle ImageRead storage swizzle. * shader_recompiler: Fix type errors * liverpool_to_vk: Remove redundant clear color swizzles. * shader_recompiler: Reduce CompositeConstruct to constants where possible. * shader_recompiler: Fix ImageRead/Write and StoreBufferFormatF32 types. * amdgpu: Add a few more unsupported format remaps.
This commit is contained in:
parent
284f473a52
commit
41d64a200d
22 changed files with 522 additions and 282 deletions
|
@ -663,6 +663,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
|
||||
size_t comp1) {
|
||||
if (vector1.Type() != vector2.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
|
||||
}
|
||||
if (comp0 >= 4 || comp1 >= 4) {
|
||||
UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1);
|
||||
}
|
||||
const auto shuffle{[&](Opcode opcode) -> Value {
|
||||
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
|
||||
Value{static_cast<u32>(comp1)});
|
||||
}};
|
||||
switch (vector1.Type()) {
|
||||
case Type::U32x4:
|
||||
return shuffle(Opcode::CompositeShuffleU32x2);
|
||||
case Type::F16x4:
|
||||
return shuffle(Opcode::CompositeShuffleF16x2);
|
||||
case Type::F32x4:
|
||||
return shuffle(Opcode::CompositeShuffleF32x2);
|
||||
case Type::F64x4:
|
||||
return shuffle(Opcode::CompositeShuffleF64x2);
|
||||
default:
|
||||
ThrowInvalidType(vector1.Type());
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
|
||||
size_t comp1, size_t comp2) {
|
||||
if (vector1.Type() != vector2.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
|
||||
}
|
||||
if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) {
|
||||
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2);
|
||||
}
|
||||
const auto shuffle{[&](Opcode opcode) -> Value {
|
||||
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
|
||||
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)});
|
||||
}};
|
||||
switch (vector1.Type()) {
|
||||
case Type::U32x4:
|
||||
return shuffle(Opcode::CompositeShuffleU32x3);
|
||||
case Type::F16x4:
|
||||
return shuffle(Opcode::CompositeShuffleF16x3);
|
||||
case Type::F32x4:
|
||||
return shuffle(Opcode::CompositeShuffleF32x3);
|
||||
case Type::F64x4:
|
||||
return shuffle(Opcode::CompositeShuffleF64x3);
|
||||
default:
|
||||
ThrowInvalidType(vector1.Type());
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
|
||||
size_t comp1, size_t comp2, size_t comp3) {
|
||||
if (vector1.Type() != vector2.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
|
||||
}
|
||||
if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) {
|
||||
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2,
|
||||
comp3);
|
||||
}
|
||||
const auto shuffle{[&](Opcode opcode) -> Value {
|
||||
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
|
||||
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)},
|
||||
Value{static_cast<u32>(comp3)});
|
||||
}};
|
||||
switch (vector1.Type()) {
|
||||
case Type::U32x4:
|
||||
return shuffle(Opcode::CompositeShuffleU32x4);
|
||||
case Type::F16x4:
|
||||
return shuffle(Opcode::CompositeShuffleF16x4);
|
||||
case Type::F32x4:
|
||||
return shuffle(Opcode::CompositeShuffleF32x4);
|
||||
case Type::F64x4:
|
||||
return shuffle(Opcode::CompositeShuffleF64x4);
|
||||
default:
|
||||
ThrowInvalidType(vector1.Type());
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
|
||||
if (true_value.Type() != false_value.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type());
|
||||
|
|
|
@ -155,6 +155,13 @@ public:
|
|||
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
||||
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
||||
|
||||
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
|
||||
size_t comp1);
|
||||
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
|
||||
size_t comp1, size_t comp2);
|
||||
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
|
||||
size_t comp1, size_t comp2, size_t comp3);
|
||||
|
||||
[[nodiscard]] Value Select(const U1& condition, const Value& true_value,
|
||||
const Value& false_value);
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ OPCODE(StoreBufferU32, Void, Opaq
|
|||
OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, )
|
||||
OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, )
|
||||
OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, )
|
||||
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, )
|
||||
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
|
||||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
|
@ -124,6 +124,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x
|
|||
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
|
||||
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
|
||||
OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, )
|
||||
OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
|
||||
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
|
||||
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
|
||||
|
@ -133,6 +136,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x
|
|||
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
|
||||
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
|
||||
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
|
||||
OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, )
|
||||
OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, )
|
||||
OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
|
||||
|
@ -142,6 +148,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x
|
|||
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
|
||||
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
|
||||
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
|
||||
OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, )
|
||||
OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, )
|
||||
OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
|
||||
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
|
||||
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
|
||||
|
@ -151,6 +160,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x
|
|||
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
|
||||
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
|
||||
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
|
||||
OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, )
|
||||
OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, )
|
||||
OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, )
|
||||
|
||||
// Select operations
|
||||
OPCODE(SelectU1, U1, U1, U1, U1, )
|
||||
|
@ -346,8 +358,8 @@ OPCODE(ImageGatherDref, F32x4, Opaq
|
|||
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
||||
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
||||
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, U32, )
|
||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, U32x4, )
|
||||
OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, )
|
||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, )
|
||||
|
||||
// Image atomic operations
|
||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
@ -128,35 +129,6 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) {
|
||||
boost::container::static_vector<IR::Value, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
switch (sharp.GetSwizzle(i)) {
|
||||
case AmdGpu::CompSwizzle::Zero:
|
||||
comps.emplace_back(ir.Imm32(0.f));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::One:
|
||||
comps.emplace_back(ir.Imm32(1.f));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Red:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 0));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Green:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 1));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Blue:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 2));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Alpha:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 3));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
};
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(Info& info_)
|
||||
|
@ -409,15 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
|
||||
// Apply dst_sel swizzle on formatted buffer instructions
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2)));
|
||||
} else {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel));
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
|
@ -765,10 +728,6 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(4, SwizzleVector(ir, image, inst.Arg(4)));
|
||||
}
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||
|
@ -783,6 +742,50 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
}
|
||||
|
||||
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto buffer_res = info.texture_buffers[binding];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
if (!buffer.Valid()) {
|
||||
// Don't need to swizzle invalid buffer.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto image_res = info.images[binding & 0xFFFF];
|
||||
const auto image = image_res.GetSharp(info);
|
||||
if (!image.Valid() || !image_res.IsStorage(image)) {
|
||||
// Don't need to swizzle invalid or non-storage image.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const auto lod = inst.Arg(2);
|
||||
const auto ms = inst.Arg(3);
|
||||
const auto texel =
|
||||
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod},
|
||||
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
|
@ -852,6 +855,19 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Second pass to reinterpret format read/write where needed, since we now know
|
||||
// the bindings and their properties.
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInterpretation(*block, inst, info);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInterpretation(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
24
src/shader_recompiler/ir/reinterpret.h
Normal file
24
src/shader_recompiler/ir/reinterpret.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
/// Applies a component swizzle to a vec4.
|
||||
inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::CompMapping& swizzle) {
|
||||
// Constants are indexed as 0 and 1, and components are 4-7. Thus we can apply a swizzle
|
||||
// using two vectors and a shuffle, using one vector of constants and one of the components.
|
||||
const auto zero = ir.Imm32(0.f);
|
||||
const auto one = ir.Imm32(1.f);
|
||||
const auto constants_vec = ir.CompositeConstruct(zero, one, zero, zero);
|
||||
const auto swizzled =
|
||||
ir.CompositeShuffle(constants_vec, vector, size_t(swizzle.r), size_t(swizzle.g),
|
||||
size_t(swizzle.b), size_t(swizzle.a));
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
Loading…
Add table
Add a link
Reference in a new issue