shader_recompiler: Replace texel buffers with in-shader buffer format interpretation (#2363)

* shader_recompiler: Replace texel buffers with in-shader buffer format interpretation

* shader_recompiler: Move 10/11-bit float conversion to functions and address some comments.

* vulkan: Remove VK_KHR_maintenance5 as it is no longer needed for buffer views.

* shader_recompiler: Add helpers for composites and bitfields in pack/unpack.

* shader_recompiler: Use initializer_list for bitfield insert helper.
This commit is contained in:
squidbus 2025-02-06 20:40:49 -08:00 committed by GitHub
parent 78b4f10cc6
commit cfe249debe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 1037 additions and 562 deletions

View file

@ -30,28 +30,25 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
IR::Value unpacked_value;
bool is_integer = false;
AmdGpu::NumberFormat num_format;
switch (color_buffer.export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
// No export
return;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
unpacked_value = ir.UnpackHalf2x16(value);
num_format = AmdGpu::NumberFormat::Float;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
unpacked_value = ir.UnpackUnorm2x16(value);
num_format = AmdGpu::NumberFormat::Unorm;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
unpacked_value = ir.UnpackSnorm2x16(value);
num_format = AmdGpu::NumberFormat::Snorm;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
unpacked_value = ir.UnpackUint2x16(value);
is_integer = true;
num_format = AmdGpu::NumberFormat::Uint;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
unpacked_value = ir.UnpackSint2x16(value);
is_integer = true;
num_format = AmdGpu::NumberFormat::Sint;
break;
default:
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
@ -59,16 +56,15 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
break;
}
const auto r = ir.CompositeExtract(unpacked_value, 0);
const auto g = ir.CompositeExtract(unpacked_value, 1);
const IR::F32 float_r = is_integer ? ir.BitCast<IR::F32>(IR::U32{r}) : IR::F32{r};
const IR::F32 float_g = is_integer ? ir.BitCast<IR::F32>(IR::U32{g}) : IR::F32{g};
const auto unpacked_value = ir.Unpack2x16(num_format, value);
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
ExportMrtValue(attribute, swizzled_r, float_r, color_buffer);
ExportMrtValue(attribute, swizzled_g, float_g, color_buffer);
ExportMrtValue(attribute, swizzled_r, r, color_buffer);
ExportMrtValue(attribute, swizzled_g, g, color_buffer);
}
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
@ -115,7 +111,7 @@ void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U3
ExportMrtCompressed(attribute, idx, value);
return;
}
const IR::Value unpacked_value = ir.UnpackHalf2x16(value);
const IR::Value unpacked_value = ir.Unpack2x16(AmdGpu::NumberFormat::Float, value);
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
ir.SetAttribute(attribute, r, idx * 2);

View file

@ -651,19 +651,19 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) {
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
SetDst(inst.dst[0], ir.PackUnorm2x16(vec_f32));
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Unorm, vec_f32));
}
void Translator::V_CVT_PKNORM_I16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
SetDst(inst.dst[0], ir.PackSnorm2x16(vec_f32));
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Snorm, vec_f32));
}
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
SetDst(inst.dst[0], ir.PackHalf2x16(vec_f32));
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Float, vec_f32));
}
// VOP1
@ -1245,14 +1245,16 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
const IR::Value vec_u32 =
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
SetDst(inst.dst[0], ir.PackUint2x16(vec_u32));
ir.CompositeConstruct(ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[0])),
ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[1])));
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Uint, vec_u32));
}
void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) {
const IR::Value vec_u32 =
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
SetDst(inst.dst[0], ir.PackSint2x16(vec_u32));
ir.CompositeConstruct(ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[0])),
ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[1])));
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Sint, vec_u32));
}
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {

View file

@ -208,7 +208,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info);
const IR::Value value = ir.LoadBufferU32(num_dwords, handle, address, buffer_info);
const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::U32{value});
@ -314,16 +314,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBuffer(num_dwords, handle, address, value, buffer_info);
ir.StoreBufferU32(num_dwords, handle, address, value, buffer_info);
}
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
const IR::Value address = [&] -> IR::Value {
if (mubuf.idxen) {
if (mubuf.idxen && mubuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
return {};