diff --git a/externals/sirit b/externals/sirit index 26ad5a9d0..d6f3c0d99 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32 +Subproject commit d6f3c0d99862ab2ff8f95e9ac221560f1f97e29a diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 02ac74e19..539c6cb81 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -58,4 +58,48 @@ Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { return ctx.OpUnpackHalf2x16(ctx.F32[2], value); } +Id EmitPackUnorm2x16(EmitContext& ctx, Id value) { + return ctx.OpPackUnorm2x16(ctx.U32[1], value); +} + +Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackUnorm2x16(ctx.F32[2], value); +} + +Id EmitPackSnorm2x16(EmitContext& ctx, Id value) { + return ctx.OpPackSnorm2x16(ctx.U32[1], value); +} + +Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackSnorm2x16(ctx.F32[2], value); +} + +Id EmitPackUint2x16(EmitContext& ctx, Id value) { + // No SPIR-V instruction for this, do it manually. + const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)}; + const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)}; + return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U)); +} + +Id EmitUnpackUint2x16(EmitContext& ctx, Id value) { + // No SPIR-V instruction for this, do it manually. + const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))}; + const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))}; + return ctx.OpCompositeConstruct(ctx.U32[2], x, y); +} + +Id EmitPackSint2x16(EmitContext& ctx, Id value) { + // No SPIR-V instruction for this, do it manually. + const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)}; + const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)}; + return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U)); +} + +Id EmitUnpackSint2x16(EmitContext& ctx, Id value) { + // No SPIR-V instruction for this, do it manually. + const auto x{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))}; + const auto y{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))}; + return ctx.OpCompositeConstruct(ctx.U32[2], x, y); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 4833dc9d0..842b13207 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -197,6 +197,14 @@ Id EmitPackFloat2x16(EmitContext& ctx, Id value); Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); Id EmitPackHalf2x16(EmitContext& ctx, Id value); Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackUnorm2x16(EmitContext& ctx, Id value); +Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value); +Id EmitPackSnorm2x16(EmitContext& ctx, Id value); +Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value); +Id EmitPackUint2x16(EmitContext& ctx, Id value); +Id EmitUnpackUint2x16(EmitContext& ctx, Id value); +Id EmitPackSint2x16(EmitContext& ctx, Id value); +Id EmitUnpackSint2x16(EmitContext& ctx, Id value); Id EmitFPAbs16(EmitContext& ctx, Id value); Id EmitFPAbs32(EmitContext& ctx, Id value); Id EmitFPAbs64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 38ff9ae14..84c2ee658 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -7,6 +7,125 @@ namespace Shader::Gcn { +u32 SwizzleMrtComponent(const FragmentRuntimeInfo::PsColorBuffer& color_buffer, u32 comp) { + const auto [r, g, b, a] = color_buffer.swizzle; + const std::array swizzle_array = {r, g, b, a}; + const auto swizzled_comp_type = static_cast(swizzle_array[comp]); + constexpr auto min_comp_type = static_cast(AmdGpu::CompSwizzle::Red); + return swizzled_comp_type >= min_comp_type ? swizzled_comp_type - min_comp_type : comp; +} + +void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value, + const FragmentRuntimeInfo::PsColorBuffer& color_buffer) { + const auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion); + ir.SetAttribute(attribute, converted, comp); +} + +void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) { + const u32 color_buffer_idx = + static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0); + const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; + + IR::Value unpacked_value; + bool is_integer = false; + switch (color_buffer.export_format) { + case AmdGpu::Liverpool::ShaderExportFormat::Zero: + // No export + return; + case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16: + unpacked_value = ir.UnpackHalf2x16(value); + break; + case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16: + unpacked_value = ir.UnpackUnorm2x16(value); + break; + case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16: + unpacked_value = ir.UnpackSnorm2x16(value); + break; + case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16: + unpacked_value = ir.UnpackUint2x16(value); + is_integer = true; + break; + case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16: + unpacked_value = ir.UnpackSint2x16(value); + is_integer = true; + break; + default: + UNREACHABLE_MSG("Unimplemented compressed MRT export format {}", + static_cast(color_buffer.export_format)); + break; + } + + const auto r = ir.CompositeExtract(unpacked_value, 0); + const auto g = ir.CompositeExtract(unpacked_value, 1); + const IR::F32 float_r = is_integer ? ir.BitCast(IR::U32{r}) : IR::F32{r}; + const IR::F32 float_g = is_integer ? ir.BitCast(IR::U32{g}) : IR::F32{g}; + + const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2); + const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1); + + ExportMrtValue(attribute, swizzled_r, float_r, color_buffer); + ExportMrtValue(attribute, swizzled_g, float_g, color_buffer); +} + +void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) { + const u32 color_buffer_idx = + static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0); + const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; + const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp); + + switch (color_buffer.export_format) { + case AmdGpu::Liverpool::ShaderExportFormat::Zero: + // No export + return; + case AmdGpu::Liverpool::ShaderExportFormat::R_32: + // Red only + if (swizzled_comp != 0) { + return; + } + break; + case AmdGpu::Liverpool::ShaderExportFormat::GR_32: + // Red and Green only + if (swizzled_comp != 0 && swizzled_comp != 1) { + return; + } + break; + case AmdGpu::Liverpool::ShaderExportFormat::AR_32: + // Red and Alpha only + if (swizzled_comp != 0 && swizzled_comp != 3) { + return; + } + break; + case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32: + // All components + break; + default: + UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}", + static_cast(color_buffer.export_format)); + break; + } + ExportMrtValue(attribute, swizzled_comp, value, color_buffer); +} + +void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) { + if (IsMrt(attribute)) { + ExportMrtCompressed(attribute, idx, value); + return; + } + const IR::Value unpacked_value = ir.UnpackHalf2x16(value); + const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; + const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; + ir.SetAttribute(attribute, r, idx * 2); + ir.SetAttribute(attribute, g, idx * 2 + 1); +} + +void Translator::ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) { + if (IsMrt(attribute)) { + ExportMrtUncompressed(attribute, comp, value); + return; + } + ir.SetAttribute(attribute, value, comp); +} + void Translator::EmitExport(const GcnInst& inst) { if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) { ir.Discard(ir.LogicalNot(ir.GetExec())); @@ -26,41 +145,15 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; - const auto set_attribute = [&](u32 comp, IR::F32 value) { - if (!IR::IsMrt(attrib)) { - ir.SetAttribute(attrib, value, comp); - return; - } - const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - const auto col_buf = runtime_info.fs_info.color_buffers[index]; - const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion); - const auto [r, g, b, a] = col_buf.swizzle; - const std::array swizzle_array = {r, g, b, a}; - const auto swizzled_comp = swizzle_array[comp]; - if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { - ir.SetAttribute(attrib, converted, comp); - return; - } - ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); - }; - - const auto unpack = [&](u32 idx) { - const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); - const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; - const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - set_attribute(idx * 2, r); - set_attribute(idx * 2 + 1, g); - }; - // Components are float16 packed into a VGPR if (exp.compr) { // Export R, G if (exp.en & 1) { - unpack(0); + ExportCompressed(attrib, 0, ir.GetVectorReg(vsrc[0])); } // Export B, A if ((exp.en >> 2) & 1) { - unpack(1); + ExportCompressed(attrib, 1, ir.GetVectorReg(vsrc[1])); } } else { // Components are float32 into separate VGPRS @@ -69,8 +162,7 @@ void Translator::EmitExport(const GcnInst& inst) { if ((mask & 1) == 0) { continue; } - const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - set_attribute(i, comp); + ExportUncompressed(attrib, i, ir.GetVectorReg(vsrc[i])); } } if (IR::IsMrt(attrib)) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 496455b50..287885854 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -170,6 +170,7 @@ public: void V_SUBBREV_U32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); void V_CVT_PKNORM_U16_F32(const GcnInst& inst); + void V_CVT_PKNORM_I16_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); // VOP1 @@ -244,6 +245,7 @@ public: void V_SAD(const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); void V_CVT_PK_U16_U32(const GcnInst& inst); + void V_CVT_PK_I16_I32(const GcnInst& inst); void V_CVT_PK_U8_F32(const GcnInst& inst); void V_LSHL_B64(const GcnInst& inst); void V_MUL_F64(const GcnInst& inst); @@ -306,6 +308,13 @@ private: IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z, const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res); + void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value, + const FragmentRuntimeInfo::PsColorBuffer& color_buffer); + void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value); + void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value); + void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value); + void ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value); + void LogMissingOpcode(const GcnInst& inst); private: diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 42dbcc513..f73618dbe 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -96,6 +96,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_LDEXP_F32(inst); case Opcode::V_CVT_PKNORM_U16_F32: return V_CVT_PKNORM_U16_F32(inst); + case Opcode::V_CVT_PKNORM_I16_F32: + return V_CVT_PKNORM_I16_F32(inst); case Opcode::V_CVT_PKRTZ_F16_F32: return V_CVT_PKRTZ_F16_F32(inst); @@ -376,6 +378,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_SAD_U32(inst); case Opcode::V_CVT_PK_U16_U32: return V_CVT_PK_U16_U32(inst); + case Opcode::V_CVT_PK_I16_I32: + return V_CVT_PK_I16_I32(inst); case Opcode::V_CVT_PK_U8_F32: return V_CVT_PK_U8_F32(inst); case Opcode::V_LSHL_B64: @@ -645,12 +649,15 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) { } void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f))); - const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f))); - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16))); + const IR::Value vec_f32 = + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + SetDst(inst.dst[0], ir.PackUnorm2x16(vec_f32)); +} + +void Translator::V_CVT_PKNORM_I16_F32(const GcnInst& inst) { + const IR::Value vec_f32 = + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + SetDst(inst.dst[0], ir.PackSnorm2x16(vec_f32)); } void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { @@ -1237,11 +1244,15 @@ void Translator::V_SAD_U32(const GcnInst& inst) { } void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false); - const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false); - SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16))); + const IR::Value vec_u32 = + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + SetDst(inst.dst[0], ir.PackUint2x16(vec_u32)); +} + +void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) { + const IR::Value vec_u32 = + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + SetDst(inst.dst[0], ir.PackSint2x16(vec_u32)); } void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index f0558665b..ecbe1f838 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -795,6 +795,38 @@ Value IREmitter::UnpackHalf2x16(const U32& value) { return Inst(Opcode::UnpackHalf2x16, value); } +U32 IREmitter::PackUnorm2x16(const Value& vector) { + return Inst(Opcode::PackUnorm2x16, vector); +} + +Value IREmitter::UnpackUnorm2x16(const U32& value) { + return Inst(Opcode::UnpackUnorm2x16, value); +} + +U32 IREmitter::PackSnorm2x16(const Value& vector) { + return Inst(Opcode::PackSnorm2x16, vector); +} + +Value IREmitter::UnpackSnorm2x16(const U32& value) { + return Inst(Opcode::UnpackSnorm2x16, value); +} + +U32 IREmitter::PackUint2x16(const Value& value) { + return Inst(Opcode::PackUint2x16, value); +} + +Value IREmitter::UnpackUint2x16(const U32& value) { + return Inst(Opcode::UnpackUint2x16, value); +} + +U32 IREmitter::PackSint2x16(const Value& value) { + return Inst(Opcode::PackSint2x16, value); +} + +Value IREmitter::UnpackSint2x16(const U32& value) { + return Inst(Opcode::UnpackSint2x16, value); +} + F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) { if (a.Type() != b.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 5dd49ce7a..97b94187a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -175,6 +175,14 @@ public: [[nodiscard]] U32 PackHalf2x16(const Value& vector); [[nodiscard]] Value UnpackHalf2x16(const U32& value); + [[nodiscard]] U32 PackUnorm2x16(const Value& vector); + [[nodiscard]] Value UnpackUnorm2x16(const U32& value); + [[nodiscard]] U32 PackSnorm2x16(const Value& vector); + [[nodiscard]] Value UnpackSnorm2x16(const U32& value); + [[nodiscard]] U32 PackUint2x16(const Value& value); + [[nodiscard]] Value UnpackUint2x16(const U32& value); + [[nodiscard]] U32 PackSint2x16(const Value& value); + [[nodiscard]] Value UnpackSint2x16(const U32& value); [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 63a4e1e62..6750be5a6 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -187,6 +187,14 @@ OPCODE(PackFloat2x16, U32, F16x OPCODE(UnpackFloat2x16, F16x2, U32, ) OPCODE(PackHalf2x16, U32, F32x2, ) OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackUnorm2x16, U32, F32x2, ) +OPCODE(UnpackUnorm2x16, F32x2, U32, ) +OPCODE(PackSnorm2x16, U32, F32x2, ) +OPCODE(UnpackSnorm2x16, F32x2, U32, ) +OPCODE(PackUint2x16, U32, U32x2, ) +OPCODE(UnpackUint2x16, U32x2, U32, ) +OPCODE(PackSint2x16, U32, U32x2, ) +OPCODE(UnpackSint2x16, U32x2, U32, ) // Floating-point operations OPCODE(FPAbs32, F32, F32, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 12a1b56e9..c72b9e835 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -348,6 +348,22 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16); case IR::Opcode::UnpackFloat2x16: return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16); + case IR::Opcode::PackUnorm2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm2x16); + case IR::Opcode::UnpackUnorm2x16: + return FoldInverseFunc(inst, IR::Opcode::PackUnorm2x16); + case IR::Opcode::PackSnorm2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackSnorm2x16); + case IR::Opcode::UnpackSnorm2x16: + return FoldInverseFunc(inst, IR::Opcode::PackSnorm2x16); + case IR::Opcode::PackUint2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackUint2x16); + case IR::Opcode::UnpackUint2x16: + return FoldInverseFunc(inst, IR::Opcode::PackUint2x16); + case IR::Opcode::PackSint2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackSint2x16); + case IR::Opcode::UnpackSint2x16: + return FoldInverseFunc(inst, IR::Opcode::PackSint2x16); case IR::Opcode::SelectU1: case IR::Opcode::SelectU8: case IR::Opcode::SelectU16: diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 2bf5e3f0a..103c1faa8 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -184,6 +184,7 @@ struct FragmentRuntimeInfo { AmdGpu::NumberFormat num_format; AmdGpu::NumberConversion num_conversion; AmdGpu::CompMapping swizzle; + AmdGpu::Liverpool::ShaderExportFormat export_format; auto operator<=>(const PsColorBuffer&) const noexcept = default; }; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ec9c92ef1..67821b0f2 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -266,6 +266,10 @@ struct Liverpool { BitField<20, 4, ShaderExportFormat> col5; BitField<24, 4, ShaderExportFormat> col6; BitField<28, 4, ShaderExportFormat> col7; + + [[nodiscard]] ShaderExportFormat GetFormat(const u32 buf_idx) const { + return static_cast((raw >> (buf_idx * 4)) & 0xfu); + } }; union VsOutputControl { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 9a20f94c1..8c5cb1f3b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -35,9 +35,8 @@ struct GraphicsPipelineKey { std::array stage_hashes; u32 num_color_attachments; std::array color_formats; - std::array color_num_formats; - std::array color_num_conversions; - std::array color_swizzles; + std::array + color_buffers; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c6a56745d..3728a55fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -167,11 +167,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS }; } for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { - info.fs_info.color_buffers[i] = { - .num_format = graphics_key.color_num_formats[i], - .num_conversion = graphics_key.color_num_conversions[i], - .swizzle = graphics_key.color_swizzles[i], - }; + info.fs_info.color_buffers[i] = graphics_key.color_buffers[i]; } break; } @@ -309,11 +305,9 @@ bool PipelineCache::RefreshGraphicsKey() { // order. We need to do some arrays compaction at this stage key.num_color_attachments = 0; key.color_formats.fill(vk::Format::eUndefined); - key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); - key.color_num_conversions.fill(AmdGpu::NumberConversion::None); + key.color_buffers.fill({}); key.blend_controls.fill({}); key.write_masks.fill({}); - key.color_swizzles.fill({}); key.vertex_buffer_formats.fill(vk::Format::eUndefined); key.patch_control_points = 0; @@ -338,9 +332,12 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_formats[remapped_cb] = LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt()); - key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt(); - key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion(); - key.color_swizzles[remapped_cb] = col_buf.Swizzle(); + key.color_buffers[remapped_cb] = { + .num_format = col_buf.GetNumberFmt(), + .num_conversion = col_buf.GetNumberConversion(), + .swizzle = col_buf.Swizzle(), + .export_format = regs.color_export_format.GetFormat(cb), + }; } fetch_shader = std::nullopt; @@ -456,7 +453,7 @@ bool PipelineCache::RefreshGraphicsKey() { // of the latter we need to change format to undefined, and either way we need to // increment the index for the null attachment binding. key.color_formats[remapped_cb] = vk::Format::eUndefined; - key.color_swizzles[remapped_cb] = {}; + key.color_buffers[remapped_cb] = {}; ++remapped_cb; continue; }