shader: Implement ATOM/S and RED

This commit is contained in:
ameerj 2021-04-11 02:07:02 -04:00
parent 479ca00071
commit 3db2b3effa
21 changed files with 1745 additions and 19 deletions

View file

@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::FPOrdGreaterThanEqual16:
case IR::Opcode::FPUnordGreaterThanEqual16:
case IR::Opcode::FPIsNan16:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::StorageAtomicAddF16x2:
case IR::Opcode::StorageAtomicMinF16x2:
case IR::Opcode::StorageAtomicMaxF16x2:
info.uses_fp16 = true;
break;
case IR::Opcode::CompositeConstructF64x2:
@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::ConvertF16U64:
case IR::Opcode::ConvertF32U64:
case IR::Opcode::ConvertF64U64:
case IR::Opcode::SharedAtomicExchange64:
info.uses_int64 = true;
break;
default:
@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::FSwizzleAdd:
info.uses_fswzadd = true;
break;
case IR::Opcode::SharedAtomicInc32:
info.uses_shared_increment = true;
break;
case IR::Opcode::SharedAtomicDec32:
info.uses_shared_decrement = true;
break;
case IR::Opcode::GlobalAtomicInc32:
case IR::Opcode::StorageAtomicInc32:
info.uses_global_increment = true;
break;
case IR::Opcode::GlobalAtomicDec32:
case IR::Opcode::StorageAtomicDec32:
info.uses_global_decrement = true;
break;
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::StorageAtomicAddF32:
info.uses_atomic_f32_add = true;
break;
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::StorageAtomicAddF16x2:
info.uses_atomic_f16x2_add = true;
break;
case IR::Opcode::GlobalAtomicAddF32x2:
case IR::Opcode::StorageAtomicAddF32x2:
info.uses_atomic_f32x2_add = true;
break;
case IR::Opcode::GlobalAtomicMinF16x2:
case IR::Opcode::StorageAtomicMinF16x2:
info.uses_atomic_f16x2_min = true;
break;
case IR::Opcode::GlobalAtomicMinF32x2:
case IR::Opcode::StorageAtomicMinF32x2:
info.uses_atomic_f32x2_min = true;
break;
case IR::Opcode::GlobalAtomicMaxF16x2:
case IR::Opcode::StorageAtomicMaxF16x2:
info.uses_atomic_f16x2_max = true;
break;
case IR::Opcode::GlobalAtomicMaxF32x2:
case IR::Opcode::StorageAtomicMaxF32x2:
info.uses_atomic_f32x2_max = true;
break;
case IR::Opcode::GlobalAtomicIAdd64:
case IR::Opcode::GlobalAtomicSMin64:
case IR::Opcode::GlobalAtomicUMin64:
case IR::Opcode::GlobalAtomicSMax64:
case IR::Opcode::GlobalAtomicUMax64:
case IR::Opcode::GlobalAtomicAnd64:
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::StorageAtomicIAdd64:
case IR::Opcode::StorageAtomicSMin64:
case IR::Opcode::StorageAtomicUMin64:
case IR::Opcode::StorageAtomicSMax64:
case IR::Opcode::StorageAtomicUMax64:
case IR::Opcode::StorageAtomicAnd64:
case IR::Opcode::StorageAtomicOr64:
case IR::Opcode::StorageAtomicXor64:
info.uses_64_bit_atomics = true;
break;
case IR::Opcode::SharedAtomicExchange64:
info.uses_64_bit_atomics = true;
info.uses_shared_memory_u32x2 = true;
break;
default:
break;
}

View file

@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) {
case IR::Opcode::WriteGlobal32:
case IR::Opcode::WriteGlobal64:
case IR::Opcode::WriteGlobal128:
case IR::Opcode::GlobalAtomicIAdd32:
case IR::Opcode::GlobalAtomicSMin32:
case IR::Opcode::GlobalAtomicUMin32:
case IR::Opcode::GlobalAtomicSMax32:
case IR::Opcode::GlobalAtomicUMax32:
case IR::Opcode::GlobalAtomicInc32:
case IR::Opcode::GlobalAtomicDec32:
case IR::Opcode::GlobalAtomicAnd32:
case IR::Opcode::GlobalAtomicOr32:
case IR::Opcode::GlobalAtomicXor32:
case IR::Opcode::GlobalAtomicExchange32:
case IR::Opcode::GlobalAtomicIAdd64:
case IR::Opcode::GlobalAtomicSMin64:
case IR::Opcode::GlobalAtomicUMin64:
case IR::Opcode::GlobalAtomicSMax64:
case IR::Opcode::GlobalAtomicUMax64:
case IR::Opcode::GlobalAtomicAnd64:
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
case IR::Opcode::GlobalAtomicMinF16x2:
case IR::Opcode::GlobalAtomicMinF32x2:
case IR::Opcode::GlobalAtomicMaxF16x2:
case IR::Opcode::GlobalAtomicMaxF32x2:
return true;
default:
return false;
@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
return IR::Opcode::WriteStorage64;
case IR::Opcode::WriteGlobal128:
return IR::Opcode::WriteStorage128;
case IR::Opcode::GlobalAtomicIAdd32:
return IR::Opcode::StorageAtomicIAdd32;
case IR::Opcode::GlobalAtomicSMin32:
return IR::Opcode::StorageAtomicSMin32;
case IR::Opcode::GlobalAtomicUMin32:
return IR::Opcode::StorageAtomicUMin32;
case IR::Opcode::GlobalAtomicSMax32:
return IR::Opcode::StorageAtomicSMax32;
case IR::Opcode::GlobalAtomicUMax32:
return IR::Opcode::StorageAtomicUMax32;
case IR::Opcode::GlobalAtomicInc32:
return IR::Opcode::StorageAtomicInc32;
case IR::Opcode::GlobalAtomicDec32:
return IR::Opcode::StorageAtomicDec32;
case IR::Opcode::GlobalAtomicAnd32:
return IR::Opcode::StorageAtomicAnd32;
case IR::Opcode::GlobalAtomicOr32:
return IR::Opcode::StorageAtomicOr32;
case IR::Opcode::GlobalAtomicXor32:
return IR::Opcode::StorageAtomicXor32;
case IR::Opcode::GlobalAtomicIAdd64:
return IR::Opcode::StorageAtomicIAdd64;
case IR::Opcode::GlobalAtomicSMin64:
return IR::Opcode::StorageAtomicSMin64;
case IR::Opcode::GlobalAtomicUMin64:
return IR::Opcode::StorageAtomicUMin64;
case IR::Opcode::GlobalAtomicSMax64:
return IR::Opcode::StorageAtomicSMax64;
case IR::Opcode::GlobalAtomicUMax64:
return IR::Opcode::StorageAtomicUMax64;
case IR::Opcode::GlobalAtomicAnd64:
return IR::Opcode::StorageAtomicAnd64;
case IR::Opcode::GlobalAtomicOr64:
return IR::Opcode::StorageAtomicOr64;
case IR::Opcode::GlobalAtomicXor64:
return IR::Opcode::StorageAtomicXor64;
case IR::Opcode::GlobalAtomicExchange32:
return IR::Opcode::StorageAtomicExchange32;
case IR::Opcode::GlobalAtomicExchange64:
return IR::Opcode::StorageAtomicExchange64;
case IR::Opcode::GlobalAtomicAddF32:
return IR::Opcode::StorageAtomicAddF32;
case IR::Opcode::GlobalAtomicAddF16x2:
return IR::Opcode::StorageAtomicAddF16x2;
case IR::Opcode::GlobalAtomicMinF16x2:
return IR::Opcode::StorageAtomicMinF16x2;
case IR::Opcode::GlobalAtomicMaxF16x2:
return IR::Opcode::StorageAtomicMaxF16x2;
case IR::Opcode::GlobalAtomicAddF32x2:
return IR::Opcode::StorageAtomicAddF32x2;
case IR::Opcode::GlobalAtomicMinF32x2:
return IR::Opcode::StorageAtomicMinF32x2;
case IR::Opcode::GlobalAtomicMaxF32x2:
return IR::Opcode::StorageAtomicMaxF32x2;
default:
throw InvalidArgument("Invalid global memory opcode {}", opcode);
}
@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
inst.Invalidate();
}
/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
const IR::U32& offset) {
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
const IR::Value value{
&*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
inst.ReplaceUsesWith(value);
}
/// Replace a global memory instruction with its storage buffer equivalent
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
const IR::U32& offset) {
@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
case IR::Opcode::WriteGlobal64:
case IR::Opcode::WriteGlobal128:
return ReplaceWrite(block, inst, storage_index, offset);
case IR::Opcode::GlobalAtomicIAdd32:
case IR::Opcode::GlobalAtomicSMin32:
case IR::Opcode::GlobalAtomicUMin32:
case IR::Opcode::GlobalAtomicSMax32:
case IR::Opcode::GlobalAtomicUMax32:
case IR::Opcode::GlobalAtomicInc32:
case IR::Opcode::GlobalAtomicDec32:
case IR::Opcode::GlobalAtomicAnd32:
case IR::Opcode::GlobalAtomicOr32:
case IR::Opcode::GlobalAtomicXor32:
case IR::Opcode::GlobalAtomicExchange32:
case IR::Opcode::GlobalAtomicIAdd64:
case IR::Opcode::GlobalAtomicSMin64:
case IR::Opcode::GlobalAtomicUMin64:
case IR::Opcode::GlobalAtomicSMax64:
case IR::Opcode::GlobalAtomicUMax64:
case IR::Opcode::GlobalAtomicAnd64:
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
case IR::Opcode::GlobalAtomicMinF16x2:
case IR::Opcode::GlobalAtomicMinF32x2:
case IR::Opcode::GlobalAtomicMaxF16x2:
case IR::Opcode::GlobalAtomicMaxF32x2:
return ReplaceAtomic(block, inst, storage_index, offset);
default:
throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
}
@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
CollectStorageBuffers(*block, inst, info);
}
}
u32 storage_index{};
for (const StorageBufferAddr& storage_buffer : info.set) {
program.info.storage_buffers_descriptors.push_back({
.cbuf_index = storage_buffer.index,
@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
.count = 1,
.is_written{info.writes.contains(storage_buffer)},
});
++storage_index;
}
for (const StorageInst& storage_inst : info.to_replace) {
const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};

View file

@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) {
return IR::Opcode::ConvertF32U32;
case IR::Opcode::ConvertF16U64:
return IR::Opcode::ConvertF32U64;
case IR::Opcode::GlobalAtomicAddF16x2:
return IR::Opcode::GlobalAtomicAddF32x2;
case IR::Opcode::StorageAtomicAddF16x2:
return IR::Opcode::StorageAtomicAddF32x2;
case IR::Opcode::GlobalAtomicMinF16x2:
return IR::Opcode::GlobalAtomicMinF32x2;
case IR::Opcode::StorageAtomicMinF16x2:
return IR::Opcode::StorageAtomicMinF32x2;
case IR::Opcode::GlobalAtomicMaxF16x2:
return IR::Opcode::GlobalAtomicMaxF32x2;
case IR::Opcode::StorageAtomicMaxF16x2:
return IR::Opcode::StorageAtomicMaxF32x2;
default:
return op;
}