shader: Partial implementation of LDC
This commit is contained in:
parent
ab46371247
commit
3a63fa0477
16 changed files with 405 additions and 50 deletions
|
@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
|
|||
|
||||
void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::WorkgroupId:
|
||||
info.uses_workgroup_id = true;
|
||||
break;
|
||||
case IR::Opcode::LocalInvocationId:
|
||||
info.uses_local_invocation_id = true;
|
||||
break;
|
||||
case IR::Opcode::CompositeConstructF16x2:
|
||||
case IR::Opcode::CompositeConstructF16x3:
|
||||
case IR::Opcode::CompositeConstructF16x4:
|
||||
case IR::Opcode::CompositeExtractF16x2:
|
||||
case IR::Opcode::CompositeExtractF16x3:
|
||||
case IR::Opcode::CompositeExtractF16x4:
|
||||
case IR::Opcode::SelectF16:
|
||||
case IR::Opcode::BitCastU16F16:
|
||||
case IR::Opcode::BitCastF16U16:
|
||||
case IR::Opcode::PackFloat2x16:
|
||||
|
@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::FPTrunc64:
|
||||
info.uses_fp64 = true;
|
||||
break;
|
||||
case IR::Opcode::GetCbuf:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::GetCbufU8:
|
||||
case IR::Opcode::GetCbufS8:
|
||||
case IR::Opcode::UndefU8:
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::LoadStorageU8:
|
||||
case IR::Opcode::LoadStorageS8:
|
||||
case IR::Opcode::WriteStorageU8:
|
||||
case IR::Opcode::WriteStorageS8:
|
||||
case IR::Opcode::SelectU8:
|
||||
info.uses_int8 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::GetCbufU16:
|
||||
case IR::Opcode::GetCbufS16:
|
||||
case IR::Opcode::UndefU16:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::LoadStorageU16:
|
||||
case IR::Opcode::LoadStorageS16:
|
||||
case IR::Opcode::WriteStorageU16:
|
||||
case IR::Opcode::WriteStorageS16:
|
||||
case IR::Opcode::SelectU16:
|
||||
case IR::Opcode::BitCastU16F16:
|
||||
case IR::Opcode::BitCastF16U16:
|
||||
case IR::Opcode::ConvertS16F16:
|
||||
case IR::Opcode::ConvertS16F32:
|
||||
case IR::Opcode::ConvertS16F64:
|
||||
case IR::Opcode::ConvertU16F16:
|
||||
case IR::Opcode::ConvertU16F32:
|
||||
case IR::Opcode::ConvertU16F64:
|
||||
info.uses_int16 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::GetCbufU64:
|
||||
case IR::Opcode::UndefU64:
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
case IR::Opcode::SelectU64:
|
||||
case IR::Opcode::BitCastU64F64:
|
||||
case IR::Opcode::BitCastF64U64:
|
||||
case IR::Opcode::PackUint2x32:
|
||||
case IR::Opcode::UnpackUint2x32:
|
||||
case IR::Opcode::IAdd64:
|
||||
case IR::Opcode::ISub64:
|
||||
case IR::Opcode::INeg64:
|
||||
case IR::Opcode::ShiftLeftLogical64:
|
||||
case IR::Opcode::ShiftRightLogical64:
|
||||
case IR::Opcode::ShiftRightArithmetic64:
|
||||
case IR::Opcode::ConvertS64F16:
|
||||
case IR::Opcode::ConvertS64F32:
|
||||
case IR::Opcode::ConvertS64F64:
|
||||
case IR::Opcode::ConvertU64F16:
|
||||
case IR::Opcode::ConvertU64F32:
|
||||
case IR::Opcode::ConvertU64F64:
|
||||
case IR::Opcode::ConvertU64U32:
|
||||
case IR::Opcode::ConvertU32U64:
|
||||
case IR::Opcode::ConvertF16U64:
|
||||
case IR::Opcode::ConvertF32U64:
|
||||
case IR::Opcode::ConvertF64U64:
|
||||
info.uses_int64 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::WorkgroupId:
|
||||
info.uses_workgroup_id = true;
|
||||
break;
|
||||
case IR::Opcode::LocalInvocationId:
|
||||
info.uses_local_invocation_id = true;
|
||||
break;
|
||||
case IR::Opcode::GetCbufU8:
|
||||
case IR::Opcode::GetCbufS8:
|
||||
case IR::Opcode::GetCbufU16:
|
||||
case IR::Opcode::GetCbufS16:
|
||||
case IR::Opcode::GetCbufU32:
|
||||
case IR::Opcode::GetCbufF32:
|
||||
case IR::Opcode::GetCbufU64: {
|
||||
if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) {
|
||||
AddConstantBufferDescriptor(info, index.U32(), 1);
|
||||
} else {
|
||||
throw NotImplementedException("Constant buffer with non-immediate index");
|
||||
}
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::GetCbufU8:
|
||||
case IR::Opcode::GetCbufS8:
|
||||
info.used_constant_buffer_types |= IR::Type::U8;
|
||||
break;
|
||||
case IR::Opcode::GetCbufU16:
|
||||
case IR::Opcode::GetCbufS16:
|
||||
info.used_constant_buffer_types |= IR::Type::U16;
|
||||
break;
|
||||
case IR::Opcode::GetCbufU32:
|
||||
info.used_constant_buffer_types |= IR::Type::U32;
|
||||
break;
|
||||
case IR::Opcode::GetCbufF32:
|
||||
info.used_constant_buffer_types |= IR::Type::F32;
|
||||
break;
|
||||
case IR::Opcode::GetCbufU64:
|
||||
info.used_constant_buffer_types |= IR::Type::U64;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleExplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefImplicitLod:
|
||||
|
|
|
@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) {
|
|||
// ISub32 is generally used to subtract two constant buffers, compare and replace this with
|
||||
// zero if they equal.
|
||||
const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
|
||||
return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf &&
|
||||
return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 &&
|
||||
a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
|
||||
}};
|
||||
IR::Inst* op_a{inst.Arg(0).InstRecursive()};
|
||||
|
@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) {
|
|||
// Canonicalize local variables to simplify the following logic
|
||||
std::swap(op_a, op_b);
|
||||
}
|
||||
if (op_b->Opcode() != IR::Opcode::GetCbuf) {
|
||||
if (op_b->Opcode() != IR::Opcode::GetCbufU32) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const inst_cbuf{op_b};
|
||||
|
@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
template <typename Dest, typename Source>
|
||||
template <IR::Opcode op, typename Dest, typename Source>
|
||||
void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||
const IR::Value value{inst.Arg(0)};
|
||||
if (value.IsImmediate()) {
|
||||
|
@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
return;
|
||||
}
|
||||
IR::Inst* const arg_inst{value.InstRecursive()};
|
||||
if (value.InstRecursive()->Opcode() == reverse) {
|
||||
if (arg_inst->Opcode() == reverse) {
|
||||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||
return;
|
||||
}
|
||||
if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||
if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) {
|
||||
// Replace the bitcast with a typed constant buffer read
|
||||
inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::ISub32:
|
||||
return FoldISub32(inst);
|
||||
case IR::Opcode::BitCastF32U32:
|
||||
return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32);
|
||||
return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
|
||||
case IR::Opcode::BitCastU32F32:
|
||||
return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
|
||||
return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
|
||||
case IR::Opcode::IAdd64:
|
||||
return FoldAdd<u64>(block, inst);
|
||||
case IR::Opcode::SelectU32:
|
||||
|
|
|
@ -203,7 +203,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
|
|||
return std::nullopt;
|
||||
}
|
||||
const IR::Inst* const inst{value.InstRecursive()};
|
||||
if (inst->Opcode() == IR::Opcode::GetCbuf) {
|
||||
if (inst->Opcode() == IR::Opcode::GetCbufU32) {
|
||||
const IR::Value index{inst->Arg(0)};
|
||||
const IR::Value offset{inst->Arg(1)};
|
||||
if (!index.IsImmediate()) {
|
||||
|
|
|
@ -78,7 +78,7 @@ std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value,
|
|||
return std::nullopt;
|
||||
}
|
||||
const IR::Inst* const inst{value.InstRecursive()};
|
||||
if (inst->Opcode() == IR::Opcode::GetCbuf) {
|
||||
if (inst->Opcode() == IR::Opcode::GetCbufU32) {
|
||||
const IR::Value index{inst->Arg(0)};
|
||||
const IR::Value offset{inst->Arg(1)};
|
||||
if (!index.IsImmediate()) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue