spirv: Add lower fp16 to fp32 pass

This commit is contained in:
ReinUsesLisp 2021-02-19 18:10:18 -03:00 committed by ameerj
parent 85cce78583
commit 6db69990da
32 changed files with 479 additions and 285 deletions

View file

@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::LocalInvocationId:
info.uses_local_invocation_id = true;
break;
case IR::Opcode::CompositeConstructF16x2:
case IR::Opcode::CompositeConstructF16x3:
case IR::Opcode::CompositeConstructF16x4:
case IR::Opcode::CompositeExtractF16x2:
case IR::Opcode::CompositeExtractF16x3:
case IR::Opcode::CompositeExtractF16x4:
case IR::Opcode::BitCastU16F16:
case IR::Opcode::BitCastF16U16:
case IR::Opcode::PackFloat2x16:
case IR::Opcode::UnpackFloat2x16:
case IR::Opcode::ConvertS16F16:
case IR::Opcode::ConvertS32F16:
case IR::Opcode::ConvertS64F16:
case IR::Opcode::ConvertU16F16:
case IR::Opcode::ConvertU32F16:
case IR::Opcode::ConvertU64F16:
case IR::Opcode::FPAbs16:
case IR::Opcode::FPAdd16:
case IR::Opcode::FPCeil16:
@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::FPRoundEven16:
case IR::Opcode::FPSaturate16:
case IR::Opcode::FPTrunc16:
info.uses_fp16;
info.uses_fp16 = true;
break;
case IR::Opcode::FPAbs64:
case IR::Opcode::FPAdd64:

View file

@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) {
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
/*
* We are looking for this pattern:
* %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1)
* %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1)
* %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1)
* %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1)
* %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1)
* %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10)
* %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
* %rhs_mul = IMul32 %rhs_bfe, %factor_b
* %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
* %rhs_mul = IMul32 %lhs_bfe, %factor_b
* %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
* %result = IAdd32 %lhs_shl, %rhs_mul
*
* And replacing it with
* %result = IMul32 %factor_a, %factor_b

View file

@ -0,0 +1,79 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/microinstruction.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
IR::Opcode Replace(IR::Opcode op) {
switch (op) {
case IR::Opcode::FPAbs16:
return IR::Opcode::FPAbs32;
case IR::Opcode::FPAdd16:
return IR::Opcode::FPAdd32;
case IR::Opcode::FPCeil16:
return IR::Opcode::FPCeil32;
case IR::Opcode::FPFloor16:
return IR::Opcode::FPFloor32;
case IR::Opcode::FPFma16:
return IR::Opcode::FPFma32;
case IR::Opcode::FPMul16:
return IR::Opcode::FPMul32;
case IR::Opcode::FPNeg16:
return IR::Opcode::FPNeg32;
case IR::Opcode::FPRoundEven16:
return IR::Opcode::FPRoundEven32;
case IR::Opcode::FPSaturate16:
return IR::Opcode::FPSaturate32;
case IR::Opcode::FPTrunc16:
return IR::Opcode::FPTrunc32;
case IR::Opcode::CompositeConstructF16x2:
return IR::Opcode::CompositeConstructF32x2;
case IR::Opcode::CompositeConstructF16x3:
return IR::Opcode::CompositeConstructF32x3;
case IR::Opcode::CompositeConstructF16x4:
return IR::Opcode::CompositeConstructF32x4;
case IR::Opcode::CompositeExtractF16x2:
return IR::Opcode::CompositeExtractF32x2;
case IR::Opcode::CompositeExtractF16x3:
return IR::Opcode::CompositeExtractF32x3;
case IR::Opcode::CompositeExtractF16x4:
return IR::Opcode::CompositeExtractF32x4;
case IR::Opcode::ConvertS16F16:
return IR::Opcode::ConvertS16F32;
case IR::Opcode::ConvertS32F16:
return IR::Opcode::ConvertS32F32;
case IR::Opcode::ConvertS64F16:
return IR::Opcode::ConvertS64F32;
case IR::Opcode::ConvertU16F16:
return IR::Opcode::ConvertU16F32;
case IR::Opcode::ConvertU32F16:
return IR::Opcode::ConvertU32F32;
case IR::Opcode::ConvertU64F16:
return IR::Opcode::ConvertU64F32;
case IR::Opcode::PackFloat2x16:
return IR::Opcode::PackHalf2x16;
case IR::Opcode::UnpackFloat2x16:
return IR::Opcode::UnpackHalf2x16;
default:
return op;
}
}
} // Anonymous namespace
void LowerFp16ToFp32(IR::Program& program) {
for (IR::Function& function : program.functions) {
for (IR::Block* const block : function.blocks) {
for (IR::Inst& inst : block->Instructions()) {
inst.ReplaceOpcode(Replace(inst.Opcode()));
}
}
}
}
} // namespace Shader::Optimization

View file

@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block);
void DeadCodeEliminationPass(IR::Block& block);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Function& function);
void LowerFp16ToFp32(IR::Program& program);
void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
void VerificationPass(const IR::Function& function);