Handle DS_READ_U16, DS_WRITE_B16, DS_ADD_U64 (#3007)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

* Handle DS_READ_U16 & DS_WRITE_B16

* Refactor DS translation

* Translate DS_ADD_U64

* format

* Fix RingAccessElimination after changing WriteShared64 type

* Simplify bounds checking in generated SPIR-V
This commit is contained in:
Marcin Mikołajczyk 2025-06-09 21:03:38 +02:00 committed by GitHub
parent a71bfb30a2
commit 217d32b502
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 323 additions and 89 deletions

View file

@ -293,10 +293,12 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) {
case 16:
return Inst<U16>(Opcode::LoadSharedU16, offset);
case 32:
return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64:
return Inst(Opcode::LoadSharedU64, offset);
return Inst<U64>(Opcode::LoadSharedU64, offset);
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
@ -304,6 +306,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
switch (bit_size) {
case 16:
Inst(Opcode::WriteSharedU16, offset, value);
break;
case 32:
Inst(Opcode::WriteSharedU32, offset, value);
break;
@ -315,10 +320,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
}
}
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
default:
ThrowInvalidType(data.Type());
}

View file

@ -99,7 +99,7 @@ public:
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);

View file

@ -30,13 +30,16 @@ OPCODE(EmitVertex, Void,
OPCODE(EmitPrimitive, Void, )
// Shared memory operations
OPCODE(LoadSharedU16, U16, U32, )
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(LoadSharedU64, U64, U32, )
OPCODE(WriteSharedU16, Void, U32, U16, )
OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU64, Void, U32, U64, )
// Shared atomic operations
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
@ -116,6 +119,7 @@ OPCODE(StoreBufferFormatF32, Void, Opaq
// Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )

View file

@ -39,11 +39,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32();
}
IR::Value data = inst.Arg(1).Resolve();
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
: inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) {
const auto attrib = IR::Attribute::Param0 + (offset / 16);
const auto comp = (offset / 4) % 4;
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
const IR::U32 value =
IR::U32{is_composite ? ir.CompositeExtract(data, i) : data};
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
offset += 4;
}

View file

@ -34,8 +34,10 @@ void Visit(Info& info, const IR::Inst& inst) {
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
break;
}
case IR::Opcode::LoadSharedU16:
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU16:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
info.uses_shared = true;

View file

@ -16,6 +16,7 @@ static bool IsSharedAccess(const IR::Inst& inst) {
case IR::Opcode::WriteSharedU64:
case IR::Opcode::SharedAtomicAnd32:
case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64:
case IR::Opcode::SharedAtomicOr32:
case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32:
@ -33,9 +34,11 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
if (program.info.stage != Stage::Compute) {
return;
}
// Only perform the transform if the host shared memory is insufficient.
// Only perform the transform if the host shared memory is insufficient
// or the device does not support VK_KHR_workgroup_memory_explicit_layout
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size <= profile.max_shared_memory_size) {
if (shared_memory_size <= profile.max_shared_memory_size &&
profile.supports_workgroup_explicit_memory_layout) {
return;
}
// Add buffer binding for shared memory storage buffer.
@ -60,6 +63,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
continue;
case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64:
inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
continue;
@ -93,12 +97,19 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
ir.Imm32(shared_memory_size));
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU16:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));
break;
case IR::Opcode::LoadSharedU32:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
break;
case IR::Opcode::LoadSharedU64:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
break;
case IR::Opcode::WriteSharedU16:
ir.StoreBufferU16(handle, address, IR::U32{inst.Arg(1)}, {});
inst.Invalidate();
break;
case IR::Opcode::WriteSharedU32:
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
inst.Invalidate();