mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 12:26:18 +00:00
Handle DS_READ_U16, DS_WRITE_B16, DS_ADD_U64 (#3007)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
* Handle DS_READ_U16 & DS_WRITE_B16 * Refactor DS translation * Translate DS_ADD_U64 * format * Fix RingAccessElimination after changing WriteShared64 type * Simplify bounds checking in generated SPIR-V
This commit is contained in:
parent
a71bfb30a2
commit
217d32b502
19 changed files with 323 additions and 89 deletions
|
@ -293,10 +293,12 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
|||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
return Inst(Opcode::LoadSharedU64, offset);
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
|
@ -304,6 +306,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
|||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
break;
|
||||
|
@ -315,10 +320,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
|||
}
|
||||
}
|
||||
|
||||
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ public:
|
|||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
||||
|
|
|
@ -30,13 +30,16 @@ OPCODE(EmitVertex, Void,
|
|||
OPCODE(EmitPrimitive, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU16, U16, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU64, U64, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U16, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U64, )
|
||||
|
||||
// Shared atomic operations
|
||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||
|
@ -116,6 +119,7 @@ OPCODE(StoreBufferFormatF32, Void, Opaq
|
|||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||
|
|
|
@ -39,11 +39,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
IR::Value data = inst.Arg(1).Resolve();
|
||||
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
|
||||
: inst.Arg(1).Resolve();
|
||||
for (s32 i = 0; i < num_components; i++) {
|
||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||
const auto comp = (offset / 4) % 4;
|
||||
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
||||
const IR::U32 value =
|
||||
IR::U32{is_composite ? ir.CompositeExtract(data, i) : data};
|
||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||
offset += 4;
|
||||
}
|
||||
|
|
|
@ -34,8 +34,10 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
info.uses_shared = true;
|
||||
|
|
|
@ -16,6 +16,7 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
|||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicUMax32:
|
||||
|
@ -33,9 +34,11 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||
if (program.info.stage != Stage::Compute) {
|
||||
return;
|
||||
}
|
||||
// Only perform the transform if the host shared memory is insufficient.
|
||||
// Only perform the transform if the host shared memory is insufficient
|
||||
// or the device does not support VK_KHR_workgroup_memory_explicit_layout
|
||||
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||
if (shared_memory_size <= profile.max_shared_memory_size) {
|
||||
if (shared_memory_size <= profile.max_shared_memory_size &&
|
||||
profile.supports_workgroup_explicit_memory_layout) {
|
||||
return;
|
||||
}
|
||||
// Add buffer binding for shared memory storage buffer.
|
||||
|
@ -60,6 +63,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||
continue;
|
||||
|
@ -93,12 +97,19 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||
ir.Imm32(shared_memory_size));
|
||||
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
ir.StoreBufferU16(handle, address, IR::U32{inst.Arg(1)}, {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
|
||||
inst.Invalidate();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue