mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 12:26:18 +00:00
shader_recompiler: Replace texel buffers with in-shader buffer format interpretation (#2363)
* shader_recompiler: Replace texel buffers with in-shader buffer format interpretation * shader_recompiler: Move 10/11-bit float conversion to functions and address some comments. * vulkan: Remove VK_KHR_maintenance5 as it is no longer needed for buffer views. * shader_recompiler: Add helpers for composites and bitfields in pack/unpack. * shader_recompiler: Use initializer_list for bitfield insert helper.
This commit is contained in:
parent
78b4f10cc6
commit
cfe249debe
35 changed files with 1037 additions and 562 deletions
|
@ -30,28 +30,25 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
|
|||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
|
||||
IR::Value unpacked_value;
|
||||
bool is_integer = false;
|
||||
AmdGpu::NumberFormat num_format;
|
||||
switch (color_buffer.export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
||||
// No export
|
||||
return;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
|
||||
unpacked_value = ir.UnpackHalf2x16(value);
|
||||
num_format = AmdGpu::NumberFormat::Float;
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
|
||||
unpacked_value = ir.UnpackUnorm2x16(value);
|
||||
num_format = AmdGpu::NumberFormat::Unorm;
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
|
||||
unpacked_value = ir.UnpackSnorm2x16(value);
|
||||
num_format = AmdGpu::NumberFormat::Snorm;
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
|
||||
unpacked_value = ir.UnpackUint2x16(value);
|
||||
is_integer = true;
|
||||
num_format = AmdGpu::NumberFormat::Uint;
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
|
||||
unpacked_value = ir.UnpackSint2x16(value);
|
||||
is_integer = true;
|
||||
num_format = AmdGpu::NumberFormat::Sint;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
|
||||
|
@ -59,16 +56,15 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
|
|||
break;
|
||||
}
|
||||
|
||||
const auto r = ir.CompositeExtract(unpacked_value, 0);
|
||||
const auto g = ir.CompositeExtract(unpacked_value, 1);
|
||||
const IR::F32 float_r = is_integer ? ir.BitCast<IR::F32>(IR::U32{r}) : IR::F32{r};
|
||||
const IR::F32 float_g = is_integer ? ir.BitCast<IR::F32>(IR::U32{g}) : IR::F32{g};
|
||||
const auto unpacked_value = ir.Unpack2x16(num_format, value);
|
||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
|
||||
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
|
||||
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
|
||||
|
||||
ExportMrtValue(attribute, swizzled_r, float_r, color_buffer);
|
||||
ExportMrtValue(attribute, swizzled_g, float_g, color_buffer);
|
||||
ExportMrtValue(attribute, swizzled_r, r, color_buffer);
|
||||
ExportMrtValue(attribute, swizzled_g, g, color_buffer);
|
||||
}
|
||||
|
||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
|
@ -115,7 +111,7 @@ void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U3
|
|||
ExportMrtCompressed(attribute, idx, value);
|
||||
return;
|
||||
}
|
||||
const IR::Value unpacked_value = ir.UnpackHalf2x16(value);
|
||||
const IR::Value unpacked_value = ir.Unpack2x16(AmdGpu::NumberFormat::Float, value);
|
||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
ir.SetAttribute(attribute, r, idx * 2);
|
||||
|
|
|
@ -651,19 +651,19 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
|||
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackUnorm2x16(vec_f32));
|
||||
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Unorm, vec_f32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PKNORM_I16_F32(const GcnInst& inst) {
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackSnorm2x16(vec_f32));
|
||||
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Snorm, vec_f32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackHalf2x16(vec_f32));
|
||||
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Float, vec_f32));
|
||||
}
|
||||
|
||||
// VOP1
|
||||
|
@ -1245,14 +1245,16 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
|
||||
const IR::Value vec_u32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackUint2x16(vec_u32));
|
||||
ir.CompositeConstruct(ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[0])),
|
||||
ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[1])));
|
||||
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Uint, vec_u32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) {
|
||||
const IR::Value vec_u32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackSint2x16(vec_u32));
|
||||
ir.CompositeConstruct(ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[0])),
|
||||
ir.BitCast<IR::F32>(GetSrc<IR::U32>(inst.src[1])));
|
||||
SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Sint, vec_u32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {
|
||||
|
|
|
@ -208,7 +208,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info);
|
||||
const IR::Value value = ir.LoadBufferU32(num_dwords, handle, address, buffer_info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{value});
|
||||
|
@ -314,16 +314,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, buffer_info);
|
||||
ir.StoreBufferU32(num_dwords, handle, address, value, buffer_info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
if (mubuf.idxen && mubuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mubuf.idxen || mubuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue