mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 12:26:18 +00:00
shader_recompiler: Replace texel buffers with in-shader buffer format interpretation (#2363)
* shader_recompiler: Replace texel buffers with in-shader buffer format interpretation * shader_recompiler: Move 10/11-bit float conversion to functions and address some comments. * vulkan: Remove VK_KHR_maintenance5 as it is no longer needed for buffer views. * shader_recompiler: Add helpers for composites and bitfields in pack/unpack. * shader_recompiler: Use initializer_list for bitfield insert helper.
This commit is contained in:
parent
78b4f10cc6
commit
cfe249debe
35 changed files with 1037 additions and 562 deletions
|
@ -370,8 +370,16 @@ U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
|||
return Inst<U32>(Opcode::ReadConstBuffer, handle, index);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info) {
|
||||
U32 IREmitter::LoadBufferU8(const Value& handle, const Value& address, BufferInstInfo info) {
|
||||
return Inst<U32>(Opcode::LoadBufferU8, Flags{info}, handle, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::LoadBufferU16(const Value& handle, const Value& address, BufferInstInfo info) {
|
||||
return Inst<U32>(Opcode::LoadBufferU16, Flags{info}, handle, address);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBufferU32(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address);
|
||||
|
@ -386,12 +394,38 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBufferF32(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address);
|
||||
case 2:
|
||||
return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address);
|
||||
case 3:
|
||||
return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address);
|
||||
case 4:
|
||||
return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBufferFormat(const Value& handle, const Value& address, BufferInstInfo info) {
|
||||
return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address);
|
||||
}
|
||||
|
||||
void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info) {
|
||||
void IREmitter::StoreBufferU8(const Value& handle, const Value& address, const U32& data,
|
||||
BufferInstInfo info) {
|
||||
Inst(Opcode::StoreBufferU8, Flags{info}, handle, address, data);
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferU16(const Value& handle, const Value& address, const U32& data,
|
||||
BufferInstInfo info) {
|
||||
Inst(Opcode::StoreBufferU16, Flags{info}, handle, address, data);
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferU32(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data);
|
||||
|
@ -410,6 +444,31 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
}
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferF32(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
Inst(Opcode::StoreBufferF32, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 2:
|
||||
Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 3:
|
||||
Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 4:
|
||||
Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info) {
|
||||
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value);
|
||||
|
@ -457,11 +516,6 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
|
|||
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info) {
|
||||
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::DataAppend(const U32& counter) {
|
||||
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
|
||||
}
|
||||
|
@ -527,10 +581,14 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
|
|||
switch (e1.Type()) {
|
||||
case Type::U32:
|
||||
return Inst(Opcode::CompositeConstructU32x2, e1, e2);
|
||||
case Type::U32x2:
|
||||
return Inst(Opcode::CompositeConstructU32x2x2, e1, e2);
|
||||
case Type::F16:
|
||||
return Inst(Opcode::CompositeConstructF16x2, e1, e2);
|
||||
case Type::F32:
|
||||
return Inst(Opcode::CompositeConstructF32x2, e1, e2);
|
||||
case Type::F32x2:
|
||||
return Inst(Opcode::CompositeConstructF32x2x2, e1, e2);
|
||||
case Type::F64:
|
||||
return Inst(Opcode::CompositeConstructF64x2, e1, e2);
|
||||
default:
|
||||
|
@ -779,52 +837,116 @@ F64 IREmitter::PackFloat2x32(const Value& vector) {
|
|||
return Inst<F64>(Opcode::PackFloat2x32, vector);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackFloat2x16(const Value& vector) {
|
||||
return Inst<U32>(Opcode::PackFloat2x16, vector);
|
||||
U32 IREmitter::Pack2x16(const AmdGpu::NumberFormat number_format, const Value& vector) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return Inst<U32>(Opcode::PackUnorm2x16, vector);
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return Inst<U32>(Opcode::PackSnorm2x16, vector);
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return Inst<U32>(Opcode::PackUint2x16, vector);
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return Inst<U32>(Opcode::PackSint2x16, vector);
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return Inst<U32>(Opcode::PackHalf2x16, vector);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 2x16 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackFloat2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackFloat2x16, value);
|
||||
Value IREmitter::Unpack2x16(const AmdGpu::NumberFormat number_format, const U32& value) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return Inst(Opcode::UnpackUnorm2x16, value);
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return Inst(Opcode::UnpackSnorm2x16, value);
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return Inst(Opcode::UnpackUint2x16, value);
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return Inst(Opcode::UnpackSint2x16, value);
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return Inst(Opcode::UnpackHalf2x16, value);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 2x16 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::PackHalf2x16(const Value& vector) {
|
||||
return Inst<U32>(Opcode::PackHalf2x16, vector);
|
||||
U32 IREmitter::Pack4x8(const AmdGpu::NumberFormat number_format, const Value& vector) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return Inst<U32>(Opcode::PackUnorm4x8, vector);
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return Inst<U32>(Opcode::PackSnorm4x8, vector);
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return Inst<U32>(Opcode::PackUint4x8, vector);
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return Inst<U32>(Opcode::PackSint4x8, vector);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 4x8 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackHalf2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackHalf2x16, value);
|
||||
Value IREmitter::Unpack4x8(const AmdGpu::NumberFormat number_format, const U32& value) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return Inst(Opcode::UnpackUnorm4x8, value);
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return Inst(Opcode::UnpackSnorm4x8, value);
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return Inst(Opcode::UnpackUint4x8, value);
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return Inst(Opcode::UnpackSint4x8, value);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 4x8 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::PackUnorm2x16(const Value& vector) {
|
||||
return Inst<U32>(Opcode::PackUnorm2x16, vector);
|
||||
U32 IREmitter::Pack10_11_11(const AmdGpu::NumberFormat number_format, const Value& vector) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return Inst<U32>(Opcode::PackUfloat10_11_11, vector);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 10_11_11 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackUnorm2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackUnorm2x16, value);
|
||||
U32 IREmitter::Pack2_10_10_10(const AmdGpu::NumberFormat number_format, const Value& vector) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return Inst<U32>(Opcode::PackUnorm2_10_10_10, vector);
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return Inst<U32>(Opcode::PackSnorm2_10_10_10, vector);
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return Inst<U32>(Opcode::PackUint2_10_10_10, vector);
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return Inst<U32>(Opcode::PackSint2_10_10_10, vector);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 2_10_10_10 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::PackSnorm2x16(const Value& vector) {
|
||||
return Inst<U32>(Opcode::PackSnorm2x16, vector);
|
||||
Value IREmitter::Unpack2_10_10_10(const AmdGpu::NumberFormat number_format, const U32& value) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return Inst(Opcode::UnpackUnorm2_10_10_10, value);
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return Inst(Opcode::UnpackSnorm2_10_10_10, value);
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return Inst(Opcode::UnpackUint2_10_10_10, value);
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return Inst(Opcode::UnpackSint2_10_10_10, value);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 2_10_10_10 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackSnorm2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackSnorm2x16, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackUint2x16(const Value& value) {
|
||||
return Inst<U32>(Opcode::PackUint2x16, value);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackUint2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackUint2x16, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackSint2x16(const Value& value) {
|
||||
return Inst<U32>(Opcode::PackSint2x16, value);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackSint2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackSint2x16, value);
|
||||
Value IREmitter::Unpack10_11_11(const AmdGpu::NumberFormat number_format, const U32& value) {
|
||||
switch (number_format) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return Inst(Opcode::UnpackUfloat10_11_11, value);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported 10_11_11 number format: {}", number_format);
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
|
||||
|
|
|
@ -109,12 +109,22 @@ public:
|
|||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
[[nodiscard]] U32 LoadBufferU8(const Value& handle, const Value& address, BufferInstInfo info);
|
||||
[[nodiscard]] U32 LoadBufferU16(const Value& handle, const Value& address, BufferInstInfo info);
|
||||
[[nodiscard]] Value LoadBufferU32(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
[[nodiscard]] Value LoadBufferF32(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
[[nodiscard]] Value LoadBufferFormat(const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info);
|
||||
void StoreBufferU8(const Value& handle, const Value& address, const U32& data,
|
||||
BufferInstInfo info);
|
||||
void StoreBufferU16(const Value& handle, const Value& address, const U32& data,
|
||||
BufferInstInfo info);
|
||||
void StoreBufferU32(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info);
|
||||
void StoreBufferF32(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info);
|
||||
void StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info);
|
||||
|
||||
|
@ -167,22 +177,19 @@ public:
|
|||
|
||||
[[nodiscard]] U64 PackUint2x32(const Value& vector);
|
||||
[[nodiscard]] Value UnpackUint2x32(const U64& value);
|
||||
|
||||
[[nodiscard]] F64 PackFloat2x32(const Value& vector);
|
||||
|
||||
[[nodiscard]] U32 PackFloat2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackFloat2x16(const U32& value);
|
||||
[[nodiscard]] U32 Pack2x16(AmdGpu::NumberFormat number_format, const Value& vector);
|
||||
[[nodiscard]] Value Unpack2x16(AmdGpu::NumberFormat number_format, const U32& value);
|
||||
|
||||
[[nodiscard]] U32 PackHalf2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackUnorm2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackUnorm2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackSnorm2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackSnorm2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackUint2x16(const Value& value);
|
||||
[[nodiscard]] Value UnpackUint2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackSint2x16(const Value& value);
|
||||
[[nodiscard]] Value UnpackSint2x16(const U32& value);
|
||||
[[nodiscard]] U32 Pack4x8(AmdGpu::NumberFormat number_format, const Value& vector);
|
||||
[[nodiscard]] Value Unpack4x8(AmdGpu::NumberFormat number_format, const U32& value);
|
||||
|
||||
[[nodiscard]] U32 Pack10_11_11(AmdGpu::NumberFormat number_format, const Value& vector);
|
||||
[[nodiscard]] Value Unpack10_11_11(AmdGpu::NumberFormat number_format, const U32& value);
|
||||
|
||||
[[nodiscard]] U32 Pack2_10_10_10(AmdGpu::NumberFormat number_format, const Value& vector);
|
||||
[[nodiscard]] Value Unpack2_10_10_10(AmdGpu::NumberFormat number_format, const U32& value);
|
||||
|
||||
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
||||
|
|
|
@ -54,10 +54,16 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::SetAttribute:
|
||||
case Opcode::SetTcsGenericAttribute:
|
||||
case Opcode::SetPatch:
|
||||
case Opcode::StoreBufferU8:
|
||||
case Opcode::StoreBufferU16:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::StoreBufferU32x2:
|
||||
case Opcode::StoreBufferU32x3:
|
||||
case Opcode::StoreBufferU32x4:
|
||||
case Opcode::StoreBufferF32:
|
||||
case Opcode::StoreBufferF32x2:
|
||||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferFormatF32:
|
||||
case Opcode::BufferAtomicIAdd32:
|
||||
case Opcode::BufferAtomicSMin32:
|
||||
|
|
|
@ -90,15 +90,27 @@ OPCODE(UndefU32, U32,
|
|||
OPCODE(UndefU64, U64, )
|
||||
|
||||
// Buffer operations
|
||||
OPCODE(LoadBufferU8, U32, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU16, U32, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
|
||||
OPCODE(StoreBufferU8, Void, Opaque, Opaque, U32, )
|
||||
OPCODE(StoreBufferU16, Void, Opaque, Opaque, U32, )
|
||||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, )
|
||||
OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, )
|
||||
OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, )
|
||||
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
|
||||
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
|
||||
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
|
||||
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
|
||||
|
||||
// Buffer atomic operations
|
||||
|
@ -118,6 +130,7 @@ OPCODE(BufferAtomicSwap32, U32, Opaq
|
|||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x2x2, U32x4, U32x2, U32x2, )
|
||||
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
|
||||
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
|
||||
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
|
||||
|
@ -142,6 +155,7 @@ OPCODE(CompositeShuffleF16x4, F16x4, F16x
|
|||
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x2x2, F32x4, F32x2, F32x2, )
|
||||
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
|
||||
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
|
||||
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
|
||||
|
@ -180,21 +194,42 @@ OPCODE(BitCastU64F64, U64, F64,
|
|||
OPCODE(BitCastF16U16, F16, U16, )
|
||||
OPCODE(BitCastF32U32, F32, U32, )
|
||||
OPCODE(BitCastF64U64, F64, U64, )
|
||||
|
||||
OPCODE(PackUint2x32, U64, U32x2, )
|
||||
OPCODE(UnpackUint2x32, U32x2, U64, )
|
||||
OPCODE(PackFloat2x32, F64, F32x2, )
|
||||
OPCODE(PackFloat2x16, U32, F16x2, )
|
||||
OPCODE(UnpackFloat2x16, F16x2, U32, )
|
||||
OPCODE(PackHalf2x16, U32, F32x2, )
|
||||
OPCODE(UnpackHalf2x16, F32x2, U32, )
|
||||
|
||||
OPCODE(PackUnorm2x16, U32, F32x2, )
|
||||
OPCODE(UnpackUnorm2x16, F32x2, U32, )
|
||||
OPCODE(PackSnorm2x16, U32, F32x2, )
|
||||
OPCODE(UnpackSnorm2x16, F32x2, U32, )
|
||||
OPCODE(PackUint2x16, U32, U32x2, )
|
||||
OPCODE(UnpackUint2x16, U32x2, U32, )
|
||||
OPCODE(PackSint2x16, U32, U32x2, )
|
||||
OPCODE(UnpackSint2x16, U32x2, U32, )
|
||||
OPCODE(PackUint2x16, U32, F32x2, )
|
||||
OPCODE(UnpackUint2x16, F32x2, U32, )
|
||||
OPCODE(PackSint2x16, U32, F32x2, )
|
||||
OPCODE(UnpackSint2x16, F32x2, U32, )
|
||||
OPCODE(PackHalf2x16, U32, F32x2, )
|
||||
OPCODE(UnpackHalf2x16, F32x2, U32, )
|
||||
|
||||
OPCODE(PackUnorm4x8, U32, F32x4, )
|
||||
OPCODE(UnpackUnorm4x8, F32x4, U32, )
|
||||
OPCODE(PackSnorm4x8, U32, F32x4, )
|
||||
OPCODE(UnpackSnorm4x8, F32x4, U32, )
|
||||
OPCODE(PackUint4x8, U32, F32x4, )
|
||||
OPCODE(UnpackUint4x8, F32x4, U32, )
|
||||
OPCODE(PackSint4x8, U32, F32x4, )
|
||||
OPCODE(UnpackSint4x8, F32x4, U32, )
|
||||
|
||||
OPCODE(PackUfloat10_11_11, U32, F32x3, )
|
||||
OPCODE(UnpackUfloat10_11_11, F32x3, U32, )
|
||||
|
||||
OPCODE(PackUnorm2_10_10_10, U32, F32x4, )
|
||||
OPCODE(UnpackUnorm2_10_10_10, F32x4, U32, )
|
||||
OPCODE(PackSnorm2_10_10_10, U32, F32x4, )
|
||||
OPCODE(UnpackSnorm2_10_10_10, F32x4, U32, )
|
||||
OPCODE(PackUint2_10_10_10, U32, F32x4, )
|
||||
OPCODE(UnpackUint2_10_10_10, F32x4, U32, )
|
||||
OPCODE(PackSint2_10_10_10, U32, F32x4, )
|
||||
OPCODE(UnpackSint2_10_10_10, F32x4, U32, )
|
||||
|
||||
// Floating-point operations
|
||||
OPCODE(FPAbs32, F32, F32, )
|
||||
|
|
|
@ -340,14 +340,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
|
||||
case IR::Opcode::BitCastU32F32:
|
||||
return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
|
||||
case IR::Opcode::PackHalf2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
|
||||
case IR::Opcode::UnpackHalf2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
|
||||
case IR::Opcode::PackFloat2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16);
|
||||
case IR::Opcode::UnpackFloat2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16);
|
||||
// 2x16
|
||||
case IR::Opcode::PackUnorm2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm2x16);
|
||||
case IR::Opcode::UnpackUnorm2x16:
|
||||
|
@ -364,6 +357,49 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
return FoldInverseFunc(inst, IR::Opcode::UnpackSint2x16);
|
||||
case IR::Opcode::UnpackSint2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSint2x16);
|
||||
case IR::Opcode::PackHalf2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
|
||||
case IR::Opcode::UnpackHalf2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
|
||||
// 4x8
|
||||
case IR::Opcode::PackUnorm4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm4x8);
|
||||
case IR::Opcode::UnpackUnorm4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUnorm4x8);
|
||||
case IR::Opcode::PackSnorm4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackSnorm4x8);
|
||||
case IR::Opcode::UnpackSnorm4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSnorm4x8);
|
||||
case IR::Opcode::PackUint4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUint4x8);
|
||||
case IR::Opcode::UnpackUint4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUint4x8);
|
||||
case IR::Opcode::PackSint4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackSint4x8);
|
||||
case IR::Opcode::UnpackSint4x8:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSint4x8);
|
||||
// 10_11_11
|
||||
case IR::Opcode::PackUfloat10_11_11:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUfloat10_11_11);
|
||||
case IR::Opcode::UnpackUfloat10_11_11:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUfloat10_11_11);
|
||||
// 2_10_10_10
|
||||
case IR::Opcode::PackUnorm2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm2_10_10_10);
|
||||
case IR::Opcode::UnpackUnorm2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUnorm2_10_10_10);
|
||||
case IR::Opcode::PackSnorm2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackSnorm2_10_10_10);
|
||||
case IR::Opcode::UnpackSnorm2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSnorm2_10_10_10);
|
||||
case IR::Opcode::PackUint2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUint2_10_10_10);
|
||||
case IR::Opcode::UnpackUint2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUint2_10_10_10);
|
||||
case IR::Opcode::PackSint2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackSint2_10_10_10);
|
||||
case IR::Opcode::UnpackSint2_10_10_10:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSint2_10_10_10);
|
||||
case IR::Opcode::SelectU1:
|
||||
case IR::Opcode::SelectU8:
|
||||
case IR::Opcode::SelectU16:
|
||||
|
|
|
@ -19,6 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program);
|
|||
void FlattenExtendedUserdataPass(IR::Program& program);
|
||||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerBufferFormatToRaw(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
|
|
211
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
Normal file
211
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
Normal file
|
@ -0,0 +1,211 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
static bool IsBufferFormatLoad(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32;
|
||||
}
|
||||
|
||||
static bool IsBufferFormatStore(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
|
||||
}
|
||||
|
||||
static IR::Value LoadBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer,
|
||||
const IR::Value handle, const IR::U32 address,
|
||||
const IR::BufferInstInfo info) {
|
||||
const auto data_fmt = buffer.GetDataFmt();
|
||||
const auto num_fmt = buffer.GetNumberFmt();
|
||||
const auto num_conv = buffer.GetNumberConversion();
|
||||
const auto num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
|
||||
IR::Value interpreted;
|
||||
switch (data_fmt) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
interpreted = ir.Imm32(0.f);
|
||||
break;
|
||||
case AmdGpu::DataFormat::Format8: {
|
||||
const auto unpacked = ir.Unpack4x8(num_fmt, ir.LoadBufferU8(handle, address, info));
|
||||
interpreted = ir.CompositeExtract(unpacked, 0);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format8_8: {
|
||||
const auto raw = ir.LoadBufferU16(handle, address, info);
|
||||
const auto unpacked = ir.Unpack4x8(num_fmt, raw);
|
||||
interpreted = ir.CompositeConstruct(ir.CompositeExtract(unpacked, 0),
|
||||
ir.CompositeExtract(unpacked, 1));
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format8_8_8_8:
|
||||
interpreted = ir.Unpack4x8(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
|
||||
break;
|
||||
case AmdGpu::DataFormat::Format16: {
|
||||
const auto unpacked = ir.Unpack2x16(num_fmt, ir.LoadBufferU16(handle, address, info));
|
||||
interpreted = ir.CompositeExtract(unpacked, 0);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format16_16:
|
||||
interpreted = ir.Unpack2x16(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
|
||||
break;
|
||||
case AmdGpu::DataFormat::Format10_11_11:
|
||||
interpreted =
|
||||
ir.Unpack10_11_11(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
|
||||
break;
|
||||
case AmdGpu::DataFormat::Format2_10_10_10:
|
||||
interpreted =
|
||||
ir.Unpack2_10_10_10(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
|
||||
break;
|
||||
case AmdGpu::DataFormat::Format16_16_16_16: {
|
||||
const auto raw = ir.LoadBufferU32(2, handle, address, info);
|
||||
interpreted =
|
||||
ir.CompositeConstruct(ir.Unpack2x16(num_fmt, IR::U32{ir.CompositeExtract(raw, 0)}),
|
||||
ir.Unpack2x16(num_fmt, IR::U32{ir.CompositeExtract(raw, 1)}));
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32: {
|
||||
ASSERT(num_fmt == AmdGpu::NumberFormat::Uint || num_fmt == AmdGpu::NumberFormat::Sint ||
|
||||
num_fmt == AmdGpu::NumberFormat::Float);
|
||||
interpreted = ir.LoadBufferF32(num_components, handle, address, info);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported buffer data format: {}", data_fmt);
|
||||
}
|
||||
|
||||
// Pad to 4 components and apply additional modifications.
|
||||
boost::container::static_vector<IR::Value, 4> components;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (i < num_components) {
|
||||
const auto component =
|
||||
IR::F32{num_components == 1 ? interpreted : ir.CompositeExtract(interpreted, i)};
|
||||
components.push_back(ApplyReadNumberConversion(ir, component, num_conv));
|
||||
} else {
|
||||
components.push_back(ir.Imm32(0.f));
|
||||
}
|
||||
}
|
||||
const auto swizzled = ApplySwizzle(ir, ir.CompositeConstruct(components), buffer.DstSelect());
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
static void StoreBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer,
|
||||
const IR::Value handle, const IR::U32 address, const IR::Value& value,
|
||||
const IR::BufferInstInfo info) {
|
||||
const auto data_fmt = buffer.GetDataFmt();
|
||||
const auto num_fmt = buffer.GetNumberFmt();
|
||||
const auto num_conv = buffer.GetNumberConversion();
|
||||
const auto num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
|
||||
// Extract actual number of components and apply additional modifications.
|
||||
const auto swizzled = ApplySwizzle(ir, value, buffer.DstSelect().Inverse());
|
||||
boost::container::static_vector<IR::Value, 4> components;
|
||||
for (u32 i = 0; i < num_components; i++) {
|
||||
const auto component = IR::F32{ir.CompositeExtract(swizzled, i)};
|
||||
components.push_back(ApplyWriteNumberConversion(ir, component, num_conv));
|
||||
}
|
||||
const auto real_value =
|
||||
components.size() == 1 ? components[0] : ir.CompositeConstruct(components);
|
||||
|
||||
switch (data_fmt) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
break;
|
||||
case AmdGpu::DataFormat::Format8: {
|
||||
const auto packed =
|
||||
ir.Pack4x8(num_fmt, ir.CompositeConstruct(real_value, ir.Imm32(0.f), ir.Imm32(0.f),
|
||||
ir.Imm32(0.f)));
|
||||
ir.StoreBufferU8(handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format8_8: {
|
||||
const auto packed =
|
||||
ir.Pack4x8(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 0),
|
||||
ir.CompositeExtract(real_value, 1),
|
||||
ir.Imm32(0.f), ir.Imm32(0.f)));
|
||||
ir.StoreBufferU16(handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format8_8_8_8: {
|
||||
auto packed = ir.Pack4x8(num_fmt, real_value);
|
||||
ir.StoreBufferU32(1, handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format16: {
|
||||
const auto packed = ir.Pack2x16(num_fmt, ir.CompositeConstruct(real_value, ir.Imm32(0.f)));
|
||||
ir.StoreBufferU16(handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format16_16: {
|
||||
const auto packed = ir.Pack2x16(num_fmt, real_value);
|
||||
ir.StoreBufferU32(1, handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format10_11_11: {
|
||||
const auto packed = ir.Pack10_11_11(num_fmt, real_value);
|
||||
ir.StoreBufferU32(1, handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format2_10_10_10: {
|
||||
const auto packed = ir.Pack2_10_10_10(num_fmt, real_value);
|
||||
ir.StoreBufferU32(1, handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format16_16_16_16: {
|
||||
const auto packed = ir.CompositeConstruct(
|
||||
ir.Pack2x16(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 0),
|
||||
ir.CompositeExtract(real_value, 1))),
|
||||
ir.Pack2x16(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 2),
|
||||
ir.CompositeExtract(real_value, 3))));
|
||||
ir.StoreBufferU32(2, handle, address, packed, info);
|
||||
break;
|
||||
}
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32: {
|
||||
ASSERT(num_fmt == AmdGpu::NumberFormat::Uint || num_fmt == AmdGpu::NumberFormat::Sint ||
|
||||
num_fmt == AmdGpu::NumberFormat::Float);
|
||||
ir.StoreBufferF32(num_components, handle, address, real_value, info);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported buffer data format: {}", data_fmt);
|
||||
}
|
||||
}
|
||||
|
||||
static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto desc{info.buffers[inst.Arg(0).U32()]};
|
||||
const auto buffer{desc.GetSharp(info)};
|
||||
|
||||
if (IsBufferFormatLoad(inst)) {
|
||||
const auto interpreted = LoadBufferFormat(ir, buffer, inst.Arg(0), IR::U32{inst.Arg(1)},
|
||||
inst.Flags<IR::BufferInstInfo>());
|
||||
inst.ReplaceUsesWithAndRemove(interpreted);
|
||||
} else if (IsBufferFormatStore(inst)) {
|
||||
StoreBufferFormat(ir, buffer, inst.Arg(0), IR::U32{inst.Arg(1)}, inst.Arg(2),
|
||||
inst.Flags<IR::BufferInstInfo>());
|
||||
inst.Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
void LowerBufferFormatToRaw(IR::Program& program) {
|
||||
auto& info = program.info;
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferFormatLoad(inst) || IsBufferFormatStore(inst)) {
|
||||
LowerBufferFormatInst(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -1,8 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
|
@ -37,10 +35,17 @@ bool IsBufferAtomic(const IR::Inst& inst) {
|
|||
|
||||
bool IsBufferStore(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::StoreBufferU8:
|
||||
case IR::Opcode::StoreBufferU16:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferAtomic(inst);
|
||||
|
@ -49,10 +54,17 @@ bool IsBufferStore(const IR::Inst& inst) {
|
|||
|
||||
bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU8:
|
||||
case IR::Opcode::LoadBufferU16:
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::LoadBufferFormatF32:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
return true;
|
||||
default:
|
||||
|
@ -65,34 +77,6 @@ bool IsDataRingInstruction(const IR::Inst& inst) {
|
|||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
||||
}
|
||||
|
||||
bool IsTextureBufferInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
|
||||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
|
||||
}
|
||||
|
||||
bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
switch (num_format) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
switch (data_format) {
|
||||
case AmdGpu::DataFormat::Format16:
|
||||
case AmdGpu::DataFormat::Format16_16:
|
||||
case AmdGpu::DataFormat::Format16_16_16_16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
return IR::Type::U32;
|
||||
}
|
||||
|
@ -132,8 +116,7 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
|||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(Info& info_)
|
||||
: info{info_}, buffer_resources{info_.buffers},
|
||||
texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images},
|
||||
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
|
||||
sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}
|
||||
|
||||
u32 Add(const BufferResource& desc) {
|
||||
|
@ -147,15 +130,7 @@ public:
|
|||
auto& buffer = buffer_resources[index];
|
||||
buffer.used_types |= desc.used_types;
|
||||
buffer.is_written |= desc.is_written;
|
||||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const TextureBufferResource& desc) {
|
||||
const u32 index{Add(texture_buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sharp_idx == existing.sharp_idx;
|
||||
})};
|
||||
auto& buffer = texture_buffer_resources[index];
|
||||
buffer.is_written |= desc.is_written;
|
||||
buffer.is_formatted |= desc.is_formatted;
|
||||
return index;
|
||||
}
|
||||
|
||||
|
@ -196,7 +171,6 @@ private:
|
|||
|
||||
const Info& info;
|
||||
BufferResourceList& buffer_resources;
|
||||
TextureBufferResourceList& texture_buffer_resources;
|
||||
ImageResourceList& image_resources;
|
||||
SamplerResourceList& sampler_resources;
|
||||
FMaskResourceList& fmask_resources;
|
||||
|
@ -313,6 +287,8 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
|||
.sharp_idx = sharp,
|
||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||
.is_written = IsBufferStore(inst),
|
||||
.is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
|
||||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -321,21 +297,6 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
|||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
|
@ -553,36 +514,6 @@ void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
|||
inst.SetArg(1, CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
|
||||
}
|
||||
|
||||
void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.texture_buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
// Only linear addressing with index is supported currently, since we cannot yet
|
||||
// address with sub-texel granularity.
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
ASSERT_MSG(!buffer.swizzle_enable && !inst_info.offset_enable && inst_info.inst_offset == 0,
|
||||
"Unsupported texture buffer address mode.");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
// Stride of 1 to get an index into formatted data. See above addressing limitations.
|
||||
inst.SetArg(1, CalculateBufferAddress(ir, inst, info, buffer, 1U));
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect().Inverse());
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.SetArg(2, converted);
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR::Value& x,
|
||||
const IR::Value& y, const IR::Value& face) {
|
||||
if (!image.IsCube()) {
|
||||
|
@ -861,8 +792,6 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
|
@ -876,8 +805,6 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
}
|
||||
|
|
|
@ -50,12 +50,6 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
case IR::Opcode::ImageWrite:
|
||||
info.has_storage_images = true;
|
||||
break;
|
||||
case IR::Opcode::LoadBufferFormatF32:
|
||||
info.has_texel_buffers = true;
|
||||
break;
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
info.has_image_buffers = true;
|
||||
break;
|
||||
case IR::Opcode::QuadShuffle:
|
||||
info.uses_group_quad = true;
|
||||
break;
|
||||
|
@ -82,6 +76,12 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
case IR::Opcode::ReadConst:
|
||||
info.has_readconst = true;
|
||||
break;
|
||||
case IR::Opcode::PackUfloat10_11_11:
|
||||
info.uses_pack_10_11_11 = true;
|
||||
break;
|
||||
case IR::Opcode::UnpackUfloat10_11_11:
|
||||
info.uses_unpack_10_11_11 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue