shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available
This commit is contained in:
parent
84298ce191
commit
e860870dd2
20 changed files with 730 additions and 36 deletions
|
@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
|
|||
Inst(Opcode::WriteGlobal128, address, vector);
|
||||
}
|
||||
|
||||
U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
|
||||
return Inst<U32>(Opcode::LoadLocal, word_offset);
|
||||
}
|
||||
|
||||
void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
|
||||
Inst(Opcode::WriteLocal, word_offset, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
|
||||
case 16:
|
||||
return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
return Inst(Opcode::LoadSharedU64, offset);
|
||||
case 128:
|
||||
return Inst(Opcode::LoadSharedU128, offset);
|
||||
}
|
||||
throw InvalidArgument("Invalid bit size {}", bit_size);
|
||||
}
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
Inst(Opcode::WriteSharedU8, offset, value);
|
||||
break;
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
break;
|
||||
case 64:
|
||||
Inst(Opcode::WriteSharedU64, offset, value);
|
||||
break;
|
||||
case 128:
|
||||
Inst(Opcode::WriteSharedU128, offset, value);
|
||||
break;
|
||||
default:
|
||||
throw InvalidArgument("Invalid bit size {}", bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::GetZeroFromOp(const Value& op) {
|
||||
return Inst<U1>(Opcode::GetZeroFromOp, op);
|
||||
}
|
||||
|
|
|
@ -99,6 +99,12 @@ public:
|
|||
void WriteGlobal64(const U64& address, const IR::Value& vector);
|
||||
void WriteGlobal128(const U64& address, const IR::Value& vector);
|
||||
|
||||
[[nodiscard]] U32 LoadLocal(const U32& word_offset);
|
||||
void WriteLocal(const U32& word_offset, const U32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const U32& offset, const Value& value);
|
||||
|
||||
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetSignFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
|
||||
|
|
|
@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::WriteStorage32:
|
||||
case Opcode::WriteStorage64:
|
||||
case Opcode::WriteStorage128:
|
||||
case Opcode::WriteLocal:
|
||||
case Opcode::WriteSharedU8:
|
||||
case Opcode::WriteSharedU16:
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU128:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32,
|
|||
OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
|
||||
OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
|
||||
|
||||
// Local memory operations
|
||||
OPCODE(LoadLocal, U32, U32, )
|
||||
OPCODE(WriteLocal, Void, U32, U32, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU8, U32, U32, )
|
||||
OPCODE(LoadSharedS8, U32, U32, )
|
||||
OPCODE(LoadSharedU16, U32, U32, )
|
||||
OPCODE(LoadSharedS16, U32, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU128, U32x4, U32, )
|
||||
OPCODE(WriteSharedU8, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU128, Void, U32, U32x4, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
|
||||
|
|
|
@ -21,6 +21,8 @@ struct Program {
|
|||
Info info;
|
||||
Stage stage{};
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
u32 local_memory_size{};
|
||||
u32 shared_memory_size{};
|
||||
};
|
||||
|
||||
[[nodiscard]] std::string DumpProgram(const Program& program);
|
||||
|
|
|
@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
|
||||
program.post_order_blocks = PostOrder(program.blocks);
|
||||
program.stage = env.ShaderStage();
|
||||
program.local_memory_size = env.LocalMemorySize();
|
||||
if (program.stage == Stage::Compute) {
|
||||
program.workgroup_size = env.WorkgroupSize();
|
||||
program.shared_memory_size = env.SharedMemorySize();
|
||||
}
|
||||
RemoveUnreachableBlocks(program);
|
||||
|
||||
|
|
|
@ -0,0 +1,197 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Size : u64 {
|
||||
U8,
|
||||
S8,
|
||||
U16,
|
||||
S16,
|
||||
B32,
|
||||
B64,
|
||||
B128,
|
||||
};
|
||||
|
||||
IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<20, 24, u64> absolute_offset;
|
||||
BitField<20, 24, s64> relative_offset;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.offset_reg == IR::Reg::RZ) {
|
||||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
|
||||
} else {
|
||||
const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
|
||||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<int, bool> GetSize(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<48, 3, Size> size;
|
||||
} const encoding{insn};
|
||||
|
||||
const Size nnn = encoding.size;
|
||||
switch (encoding.size) {
|
||||
case Size::U8:
|
||||
return {8, false};
|
||||
case Size::S8:
|
||||
return {8, true};
|
||||
case Size::U16:
|
||||
return {16, false};
|
||||
case Size::S16:
|
||||
return {16, true};
|
||||
case Size::B32:
|
||||
return {32, false};
|
||||
case Size::B64:
|
||||
return {64, false};
|
||||
case Size::B128:
|
||||
return {128, false};
|
||||
default:
|
||||
throw NotImplementedException("Invalid size {}", encoding.size.Value());
|
||||
}
|
||||
}
|
||||
|
||||
IR::Reg Reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> reg;
|
||||
} const encoding{insn};
|
||||
|
||||
return encoding.reg;
|
||||
}
|
||||
|
||||
IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
|
||||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
|
||||
}
|
||||
|
||||
IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
|
||||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LDL(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
|
||||
|
||||
const IR::Reg dest{Reg(insn)};
|
||||
const auto [bit_size, is_signed]{GetSize(insn)};
|
||||
switch (bit_size) {
|
||||
case 8: {
|
||||
const IR::U32 bit{ByteOffset(ir, offset)};
|
||||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
const IR::U32 bit{ShortOffset(ir, offset)};
|
||||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(dest, bit_size / 32)) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||
}
|
||||
X(dest, ir.LoadLocal(word_offset));
|
||||
for (int i = 1; i < bit_size / 32; ++i) {
|
||||
X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LDS(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::Reg dest{Reg(insn)};
|
||||
const auto [bit_size, is_signed]{GetSize(insn)};
|
||||
const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
X(dest, IR::U32{value});
|
||||
break;
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(dest, bit_size / 32)) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||
}
|
||||
for (int element = 0; element < bit_size / 32; ++element) {
|
||||
X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STL(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
|
||||
|
||||
const IR::Reg reg{Reg(insn)};
|
||||
const IR::U32 src{X(reg)};
|
||||
const int bit_size{GetSize(insn).first};
|
||||
switch (bit_size) {
|
||||
case 8: {
|
||||
const IR::U32 bit{ByteOffset(ir, offset)};
|
||||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
|
||||
ir.WriteLocal(word_offset, value);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
const IR::U32 bit{ShortOffset(ir, offset)};
|
||||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
|
||||
ir.WriteLocal(word_offset, value);
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(reg, bit_size / 32)) {
|
||||
throw NotImplementedException("Unaligned source register");
|
||||
}
|
||||
ir.WriteLocal(word_offset, src);
|
||||
for (int i = 1; i < bit_size / 32; ++i) {
|
||||
ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STS(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::Reg reg{Reg(insn)};
|
||||
const int bit_size{GetSize(insn).first};
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
ir.WriteShared(bit_size, offset, X(reg));
|
||||
break;
|
||||
case 64:
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
|
||||
break;
|
||||
case 128: {
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
|
||||
ir.WriteShared(128, offset, vector);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) {
|
|||
ThrowNotImplemented(Opcode::LD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LDL(u64) {
|
||||
ThrowNotImplemented(Opcode::LDL);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LDS(u64) {
|
||||
ThrowNotImplemented(Opcode::LDS);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LEPC(u64) {
|
||||
ThrowNotImplemented(Opcode::LEPC);
|
||||
}
|
||||
|
@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) {
|
|||
ThrowNotImplemented(Opcode::ST);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STL(u64) {
|
||||
ThrowNotImplemented(Opcode::STL);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STP(u64) {
|
||||
ThrowNotImplemented(Opcode::STP);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STS(u64) {
|
||||
ThrowNotImplemented(Opcode::STS);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SUATOM_cas(u64) {
|
||||
ThrowNotImplemented(Opcode::SUATOM_cas);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue