shader: Partial implementation of LDC

This commit is contained in:
ReinUsesLisp 2021-03-09 17:14:57 -03:00 committed by ameerj
parent ab46371247
commit 3a63fa0477
16 changed files with 405 additions and 50 deletions

View file

@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
void VisitUsages(Info& info, IR::Inst& inst) {
switch (inst.Opcode()) {
case IR::Opcode::WorkgroupId:
info.uses_workgroup_id = true;
break;
case IR::Opcode::LocalInvocationId:
info.uses_local_invocation_id = true;
break;
case IR::Opcode::CompositeConstructF16x2:
case IR::Opcode::CompositeConstructF16x3:
case IR::Opcode::CompositeConstructF16x4:
case IR::Opcode::CompositeExtractF16x2:
case IR::Opcode::CompositeExtractF16x3:
case IR::Opcode::CompositeExtractF16x4:
case IR::Opcode::SelectF16:
case IR::Opcode::BitCastU16F16:
case IR::Opcode::BitCastF16U16:
case IR::Opcode::PackFloat2x16:
@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::FPTrunc64:
info.uses_fp64 = true;
break;
case IR::Opcode::GetCbuf:
default:
break;
}
switch (inst.Opcode()) {
case IR::Opcode::GetCbufU8:
case IR::Opcode::GetCbufS8:
case IR::Opcode::UndefU8:
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS8:
case IR::Opcode::WriteGlobalU8:
case IR::Opcode::WriteGlobalS8:
case IR::Opcode::LoadStorageU8:
case IR::Opcode::LoadStorageS8:
case IR::Opcode::WriteStorageU8:
case IR::Opcode::WriteStorageS8:
case IR::Opcode::SelectU8:
info.uses_int8 = true;
break;
default:
break;
}
switch (inst.Opcode()) {
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
case IR::Opcode::UndefU16:
case IR::Opcode::LoadGlobalU16:
case IR::Opcode::LoadGlobalS16:
case IR::Opcode::WriteGlobalU16:
case IR::Opcode::WriteGlobalS16:
case IR::Opcode::LoadStorageU16:
case IR::Opcode::LoadStorageS16:
case IR::Opcode::WriteStorageU16:
case IR::Opcode::WriteStorageS16:
case IR::Opcode::SelectU16:
case IR::Opcode::BitCastU16F16:
case IR::Opcode::BitCastF16U16:
case IR::Opcode::ConvertS16F16:
case IR::Opcode::ConvertS16F32:
case IR::Opcode::ConvertS16F64:
case IR::Opcode::ConvertU16F16:
case IR::Opcode::ConvertU16F32:
case IR::Opcode::ConvertU16F64:
info.uses_int16 = true;
break;
default:
break;
}
switch (inst.Opcode()) {
case IR::Opcode::GetCbufU64:
case IR::Opcode::UndefU64:
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS8:
case IR::Opcode::LoadGlobalU16:
case IR::Opcode::LoadGlobalS16:
case IR::Opcode::LoadGlobal32:
case IR::Opcode::LoadGlobal64:
case IR::Opcode::LoadGlobal128:
case IR::Opcode::WriteGlobalU8:
case IR::Opcode::WriteGlobalS8:
case IR::Opcode::WriteGlobalU16:
case IR::Opcode::WriteGlobalS16:
case IR::Opcode::WriteGlobal32:
case IR::Opcode::WriteGlobal64:
case IR::Opcode::WriteGlobal128:
case IR::Opcode::SelectU64:
case IR::Opcode::BitCastU64F64:
case IR::Opcode::BitCastF64U64:
case IR::Opcode::PackUint2x32:
case IR::Opcode::UnpackUint2x32:
case IR::Opcode::IAdd64:
case IR::Opcode::ISub64:
case IR::Opcode::INeg64:
case IR::Opcode::ShiftLeftLogical64:
case IR::Opcode::ShiftRightLogical64:
case IR::Opcode::ShiftRightArithmetic64:
case IR::Opcode::ConvertS64F16:
case IR::Opcode::ConvertS64F32:
case IR::Opcode::ConvertS64F64:
case IR::Opcode::ConvertU64F16:
case IR::Opcode::ConvertU64F32:
case IR::Opcode::ConvertU64F64:
case IR::Opcode::ConvertU64U32:
case IR::Opcode::ConvertU32U64:
case IR::Opcode::ConvertF16U64:
case IR::Opcode::ConvertF32U64:
case IR::Opcode::ConvertF64U64:
info.uses_int64 = true;
break;
default:
break;
}
switch (inst.Opcode()) {
case IR::Opcode::WorkgroupId:
info.uses_workgroup_id = true;
break;
case IR::Opcode::LocalInvocationId:
info.uses_local_invocation_id = true;
break;
case IR::Opcode::GetCbufU8:
case IR::Opcode::GetCbufS8:
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
case IR::Opcode::GetCbufU32:
case IR::Opcode::GetCbufF32:
case IR::Opcode::GetCbufU64: {
if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) {
AddConstantBufferDescriptor(info, index.U32(), 1);
} else {
throw NotImplementedException("Constant buffer with non-immediate index");
}
switch (inst.Opcode()) {
case IR::Opcode::GetCbufU8:
case IR::Opcode::GetCbufS8:
info.used_constant_buffer_types |= IR::Type::U8;
break;
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
info.used_constant_buffer_types |= IR::Type::U16;
break;
case IR::Opcode::GetCbufU32:
info.used_constant_buffer_types |= IR::Type::U32;
break;
case IR::Opcode::GetCbufF32:
info.used_constant_buffer_types |= IR::Type::F32;
break;
case IR::Opcode::GetCbufU64:
info.used_constant_buffer_types |= IR::Type::U64;
break;
default:
break;
}
break;
}
case IR::Opcode::BindlessImageSampleImplicitLod:
case IR::Opcode::BindlessImageSampleExplicitLod:
case IR::Opcode::BindlessImageSampleDrefImplicitLod:

View file

@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) {
// ISub32 is generally used to subtract two constant buffers, compare and replace this with
// zero if they equal.
const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf &&
return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 &&
a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
}};
IR::Inst* op_a{inst.Arg(0).InstRecursive()};
@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) {
// Canonicalize local variables to simplify the following logic
std::swap(op_a, op_b);
}
if (op_b->Opcode() != IR::Opcode::GetCbuf) {
if (op_b->Opcode() != IR::Opcode::GetCbufU32) {
return;
}
IR::Inst* const inst_cbuf{op_b};
@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) {
}
}
template <typename Dest, typename Source>
template <IR::Opcode op, typename Dest, typename Source>
void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
const IR::Value value{inst.Arg(0)};
if (value.IsImmediate()) {
@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
return;
}
IR::Inst* const arg_inst{value.InstRecursive()};
if (value.InstRecursive()->Opcode() == reverse) {
if (arg_inst->Opcode() == reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
return;
}
if constexpr (op == IR::Opcode::BitCastF32U32) {
if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) {
// Replace the bitcast with a typed constant buffer read
inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
inst.SetArg(0, arg_inst->Arg(0));
inst.SetArg(1, arg_inst->Arg(1));
return;
}
}
}
@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::ISub32:
return FoldISub32(inst);
case IR::Opcode::BitCastF32U32:
return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32);
return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
case IR::Opcode::BitCastU32F32:
return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
case IR::Opcode::IAdd64:
return FoldAdd<u64>(block, inst);
case IR::Opcode::SelectU32:

View file

@ -203,7 +203,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
return std::nullopt;
}
const IR::Inst* const inst{value.InstRecursive()};
if (inst->Opcode() == IR::Opcode::GetCbuf) {
if (inst->Opcode() == IR::Opcode::GetCbufU32) {
const IR::Value index{inst->Arg(0)};
const IR::Value offset{inst->Arg(1)};
if (!index.IsImmediate()) {

View file

@ -78,7 +78,7 @@ std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value,
return std::nullopt;
}
const IR::Inst* const inst{value.InstRecursive()};
if (inst->Opcode() == IR::Opcode::GetCbuf) {
if (inst->Opcode() == IR::Opcode::GetCbufU32) {
const IR::Value index{inst->Arg(0)};
const IR::Value offset{inst->Arg(1)};
if (!index.IsImmediate()) {