mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-24 12:25:00 +00:00
shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)
* shader_recompiler: Add LDEXP * shader_recompiler: Add most image integer atomic ops * shader_recompiler: Implement shared memory load/store * shader_recompiler: More image atomics * externals: Update sirit * clang format * cmake: Add missing files * shader_recompiler: Fix some atomic bugs * shader_recompiler: Vs outputs * shader_recompiler: Shared mem has side-effects, fix format component order * shader_recompiler: Inline constant buffer impl * video_core: Fix regressions * Work * Fixup a few things
This commit is contained in:
parent
af3bbc33e9
commit
6ceab6dfac
69 changed files with 1597 additions and 310 deletions
|
@ -4,8 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -88,10 +88,10 @@ constexpr size_t NumParams = 32;
|
|||
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
||||
const int result{static_cast<int>(attr) + num};
|
||||
if (result > static_cast<int>(Attribute::Param31)) {
|
||||
throw LogicError("Overflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Overflow on register arithmetic");
|
||||
}
|
||||
if (result < static_cast<int>(Attribute::RenderTarget0)) {
|
||||
throw LogicError("Underflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Underflow on register arithmetic");
|
||||
}
|
||||
return static_cast<Attribute>(result);
|
||||
}
|
||||
|
|
|
@ -39,10 +39,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
|
|||
|
||||
void Block::AddBranch(Block* block) {
|
||||
if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
|
||||
throw LogicError("Successor already inserted");
|
||||
UNREACHABLE_MSG("Successor already inserted");
|
||||
}
|
||||
if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
|
||||
throw LogicError("Predecessor already inserted");
|
||||
UNREACHABLE_MSG("Predecessor already inserted");
|
||||
}
|
||||
imm_successors.push_back(block);
|
||||
block->imm_predecessors.push_back(this);
|
||||
|
|
|
@ -115,6 +115,18 @@ void IREmitter::Discard() {
|
|||
Inst(Opcode::Discard);
|
||||
}
|
||||
|
||||
void IREmitter::Barrier() {
|
||||
Inst(Opcode::Barrier);
|
||||
}
|
||||
|
||||
void IREmitter::WorkgroupMemoryBarrier() {
|
||||
Inst(Opcode::WorkgroupMemoryBarrier);
|
||||
}
|
||||
|
||||
void IREmitter::DeviceMemoryBarrier() {
|
||||
Inst(Opcode::DeviceMemoryBarrier);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
||||
return Inst<U32>(Opcode::GetUserData, reg);
|
||||
}
|
||||
|
@ -200,6 +212,10 @@ U1 IREmitter::GetVcc() {
|
|||
return Inst<U1>(Opcode::GetVcc);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetSccLo() {
|
||||
return Inst<U32>(Opcode::GetSccLo);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetVccLo() {
|
||||
return Inst<U32>(Opcode::GetVccLo);
|
||||
}
|
||||
|
@ -220,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
|
|||
Inst(Opcode::SetVcc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetSccLo(const U32& value) {
|
||||
Inst(Opcode::SetSccLo, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVccLo(const U32& value) {
|
||||
Inst(Opcode::SetVccLo, value);
|
||||
}
|
||||
|
@ -240,22 +260,25 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
|||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||
}
|
||||
|
||||
U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
/*switch (bit_size) {
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return Inst<U32>(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset);
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
|
||||
case 16:
|
||||
return Inst<U32>(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset);
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::ReadSharedU32, offset);
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
return Inst<U64>(Opcode::ReadSharedU64, offset);
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
case 128:
|
||||
return Inst(Opcode::LoadSharedU128, offset);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);*/
|
||||
}
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
/*switch (bit_size) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
Inst(Opcode::WriteSharedU8, offset, value);
|
||||
break;
|
||||
|
@ -268,9 +291,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
|||
case 64:
|
||||
Inst(Opcode::WriteSharedU64, offset, value);
|
||||
break;
|
||||
case 128:
|
||||
Inst(Opcode::WriteSharedU128, offset, value);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
|
@ -603,6 +629,10 @@ F32 IREmitter::FPExp2(const F32& value) {
|
|||
return Inst<F32>(Opcode::FPExp2, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
|
||||
return Inst<F32>(Opcode::FPLdexp, value, exp);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPLog2(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPLog2, value);
|
||||
}
|
||||
|
@ -810,6 +840,17 @@ U1 IREmitter::FPIsNan(const F32F64& value) {
|
|||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPIsInf(const F32F64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<U1>(Opcode::FPIsInf32, value);
|
||||
case Type::F64:
|
||||
return Inst<U1>(Opcode::FPIsInf64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
|
@ -866,6 +907,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::IAddCary(const U32& a, const U32& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::IAddCary32, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
@ -1142,6 +1195,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
|
|||
}
|
||||
|
||||
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
|
||||
switch (result_bitsize) {
|
||||
case 16:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U16>(Opcode::ConvertU16U32, value);
|
||||
}
|
||||
}
|
||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
||||
|
@ -1163,6 +1223,73 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
|
|||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicIAdd32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicSMin32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicUMin32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
|
||||
bool is_signed, TextureInstInfo info) {
|
||||
return is_signed ? ImageAtomicSMin(handle, coords, value, info)
|
||||
: ImageAtomicUMin(handle, coords, value, info);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicSMax32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
|
||||
bool is_signed, TextureInstInfo info) {
|
||||
return is_signed ? ImageAtomicSMax(handle, coords, value, info)
|
||||
: ImageAtomicUMax(handle, coords, value, info);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicInc32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicDec32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicAnd32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicOr32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicXor32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info) {
|
||||
|
|
|
@ -43,6 +43,10 @@ public:
|
|||
void Epilogue();
|
||||
void Discard();
|
||||
|
||||
void Barrier();
|
||||
void WorkgroupMemoryBarrier();
|
||||
void DeviceMemoryBarrier();
|
||||
|
||||
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
|
||||
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
|
||||
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
|
||||
|
@ -60,11 +64,13 @@ public:
|
|||
[[nodiscard]] U1 GetScc();
|
||||
[[nodiscard]] U1 GetExec();
|
||||
[[nodiscard]] U1 GetVcc();
|
||||
[[nodiscard]] U32 GetSccLo();
|
||||
[[nodiscard]] U32 GetVccLo();
|
||||
[[nodiscard]] U32 GetVccHi();
|
||||
void SetScc(const U1& value);
|
||||
void SetExec(const U1& value);
|
||||
void SetVcc(const U1& value);
|
||||
void SetSccLo(const U32& value);
|
||||
void SetVccLo(const U32& value);
|
||||
void SetVccHi(const U32& value);
|
||||
|
||||
|
@ -74,7 +80,7 @@ public:
|
|||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
|
@ -120,6 +126,7 @@ public:
|
|||
[[nodiscard]] F32 FPSin(const F32& value);
|
||||
[[nodiscard]] F32 FPExp2(const F32& value);
|
||||
[[nodiscard]] F32 FPLog2(const F32& value);
|
||||
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
|
||||
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
||||
[[nodiscard]] F32 FPSqrt(const F32& value);
|
||||
|
@ -139,14 +146,16 @@ public:
|
|||
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPIsNan(const F32F64& value);
|
||||
[[nodiscard]] U1 FPIsInf(const F32F64& value);
|
||||
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
|
||||
|
||||
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
||||
|
@ -199,6 +208,33 @@ public:
|
|||
[[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value);
|
||||
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
|
||||
|
||||
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
|
||||
const Value& value, bool is_signed, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
|
||||
const Value& value, bool is_signed, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& bias, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info);
|
||||
|
|
|
@ -40,6 +40,9 @@ Inst::~Inst() {
|
|||
|
||||
bool Inst::MayHaveSideEffects() const noexcept {
|
||||
switch (op) {
|
||||
case Opcode::Barrier:
|
||||
case Opcode::WorkgroupMemoryBarrier:
|
||||
case Opcode::DeviceMemoryBarrier:
|
||||
case Opcode::ConditionRef:
|
||||
case Opcode::Reference:
|
||||
case Opcode::PhiMove:
|
||||
|
@ -52,7 +55,23 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU16:
|
||||
case Opcode::WriteSharedU8:
|
||||
case Opcode::ImageWrite:
|
||||
case Opcode::ImageAtomicIAdd32:
|
||||
case Opcode::ImageAtomicSMin32:
|
||||
case Opcode::ImageAtomicUMin32:
|
||||
case Opcode::ImageAtomicSMax32:
|
||||
case Opcode::ImageAtomicUMax32:
|
||||
case Opcode::ImageAtomicInc32:
|
||||
case Opcode::ImageAtomicDec32:
|
||||
case Opcode::ImageAtomicAnd32:
|
||||
case Opcode::ImageAtomicOr32:
|
||||
case Opcode::ImageAtomicXor32:
|
||||
case Opcode::ImageAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -61,7 +80,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
|
||||
bool Inst::AreAllArgsImmediates() const {
|
||||
if (op == Opcode::Phi) {
|
||||
throw LogicError("Testing for all arguments are immediates on phi instruction");
|
||||
UNREACHABLE_MSG("Testing for all arguments are immediates on phi instruction");
|
||||
}
|
||||
return std::all_of(args.begin(), args.begin() + NumArgs(),
|
||||
[](const IR::Value& value) { return value.IsImmediate(); });
|
||||
|
@ -91,7 +110,7 @@ void Inst::SetArg(size_t index, Value value) {
|
|||
|
||||
Block* Inst::PhiBlock(size_t index) const {
|
||||
if (op != Opcode::Phi) {
|
||||
throw LogicError("{} is not a Phi instruction", op);
|
||||
UNREACHABLE_MSG("{} is not a Phi instruction", op);
|
||||
}
|
||||
if (index >= phi_args.size()) {
|
||||
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
|
||||
|
@ -143,7 +162,7 @@ void Inst::ReplaceUsesWith(Value replacement) {
|
|||
|
||||
void Inst::ReplaceOpcode(IR::Opcode opcode) {
|
||||
if (opcode == IR::Opcode::Phi) {
|
||||
throw LogicError("Cannot transition into Phi");
|
||||
UNREACHABLE_MSG("Cannot transition into Phi");
|
||||
}
|
||||
if (op == Opcode::Phi) {
|
||||
// Transition out of phi arguments into non-phi
|
||||
|
|
|
@ -19,6 +19,25 @@ OPCODE(ReadConst, U32, U32x
|
|||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||
|
||||
// Barriers
|
||||
OPCODE(Barrier, Void, )
|
||||
OPCODE(WorkgroupMemoryBarrier, Void, )
|
||||
OPCODE(DeviceMemoryBarrier, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU8, U32, U32, )
|
||||
OPCODE(LoadSharedS8, U32, U32, )
|
||||
OPCODE(LoadSharedU16, U32, U32, )
|
||||
OPCODE(LoadSharedS16, U32, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU128, U32x4, U32, )
|
||||
OPCODE(WriteSharedU8, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU128, Void, U32, U32x4, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
|
||||
|
@ -37,11 +56,13 @@ OPCODE(SetAttribute, Void, Attr
|
|||
OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetExec, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
OPCODE(GetSccLo, U32, Void, )
|
||||
OPCODE(GetVccLo, U32, Void, )
|
||||
OPCODE(GetVccHi, U32, Void, )
|
||||
OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetExec, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(SetSccLo, Void, U32, )
|
||||
OPCODE(SetVccLo, Void, U32, )
|
||||
OPCODE(SetVccHi, Void, U32, )
|
||||
|
||||
|
@ -148,6 +169,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
|
|||
OPCODE(FPSqrt, F32, F32, )
|
||||
OPCODE(FPSin, F32, F32, )
|
||||
OPCODE(FPExp2, F32, F32, )
|
||||
OPCODE(FPLdexp, F32, F32, U32, )
|
||||
OPCODE(FPCos, F32, F32, )
|
||||
OPCODE(FPLog2, F32, F32, )
|
||||
OPCODE(FPSaturate32, F32, F32, )
|
||||
|
@ -190,10 +212,13 @@ OPCODE(FPUnordGreaterThanEqual32, U1, F32,
|
|||
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPIsNan32, U1, F32, )
|
||||
OPCODE(FPIsNan64, U1, F64, )
|
||||
OPCODE(FPIsInf32, U1, F32, )
|
||||
OPCODE(FPIsInf64, U1, F64, )
|
||||
|
||||
// Integer operations
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
OPCODE(IAdd64, U64, U64, U64, )
|
||||
OPCODE(IAddCary32, U32x2, U32, U32, )
|
||||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
|
@ -258,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
|
|||
OPCODE(ConvertF64S32, F64, U32, )
|
||||
OPCODE(ConvertF64U32, F64, U32, )
|
||||
OPCODE(ConvertF32U16, F32, U16, )
|
||||
OPCODE(ConvertU16U32, U16, U32, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
|
@ -273,6 +299,19 @@ OPCODE(ImageGradient, F32x4, Opaq
|
|||
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||
|
||||
// Image atomic operations
|
||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(QuadShuffle, U32, U32, U32 )
|
||||
|
|
|
@ -324,8 +324,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::BitFieldUExtract:
|
||||
FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
|
||||
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
|
||||
base, shift, count);
|
||||
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
|
||||
base, shift, count);
|
||||
}
|
||||
return (base >> shift) & ((1U << count) - 1);
|
||||
});
|
||||
|
@ -336,8 +336,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
const size_t left_shift{32 - back_shift};
|
||||
const size_t right_shift{static_cast<size_t>(32 - count)};
|
||||
if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
||||
base, shift, count);
|
||||
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
||||
base, shift, count);
|
||||
}
|
||||
return static_cast<u32>((base << left_shift) >> right_shift);
|
||||
});
|
||||
|
@ -345,8 +345,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::BitFieldInsert:
|
||||
FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
|
||||
if (bits >= 32 || offset >= 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {}, {})",
|
||||
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
|
||||
UNREACHABLE_MSG("Undefined result in {}({}, {}, {}, {})",
|
||||
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
|
||||
}
|
||||
return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
|
||||
});
|
||||
|
|
|
@ -89,6 +89,17 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::ImageGradient:
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageWrite:
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
case IR::Opcode::ImageAtomicSMin32:
|
||||
case IR::Opcode::ImageAtomicUMin32:
|
||||
case IR::Opcode::ImageAtomicSMax32:
|
||||
case IR::Opcode::ImageAtomicUMax32:
|
||||
case IR::Opcode::ImageAtomicInc32:
|
||||
case IR::Opcode::ImageAtomicDec32:
|
||||
case IR::Opcode::ImageAtomicAnd32:
|
||||
case IR::Opcode::ImageAtomicOr32:
|
||||
case IR::Opcode::ImageAtomicXor32:
|
||||
case IR::Opcode::ImageAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -99,6 +110,17 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
|
|||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageWrite:
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
case IR::Opcode::ImageAtomicSMin32:
|
||||
case IR::Opcode::ImageAtomicUMin32:
|
||||
case IR::Opcode::ImageAtomicSMax32:
|
||||
case IR::Opcode::ImageAtomicUMax32:
|
||||
case IR::Opcode::ImageAtomicInc32:
|
||||
case IR::Opcode::ImageAtomicDec32:
|
||||
case IR::Opcode::ImageAtomicAnd32:
|
||||
case IR::Opcode::ImageAtomicOr32:
|
||||
case IR::Opcode::ImageAtomicXor32:
|
||||
case IR::Opcode::ImageAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -115,7 +137,8 @@ public:
|
|||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
desc.dword_offset == existing.dword_offset &&
|
||||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
})};
|
||||
auto& buffer = buffer_resources[index];
|
||||
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
|
||||
|
@ -196,20 +219,70 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
};
|
||||
}
|
||||
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
|
||||
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
AmdGpu::Buffer& cbuf) {
|
||||
|
||||
// Assuming V# is in UD s[32:35]
|
||||
// The next pattern:
|
||||
// s_getpc_b64 s[32:33]
|
||||
// s_add_u32 s32, <const>, s32
|
||||
// s_addc_u32 s33, 0, s33
|
||||
// s_mov_b32 s35, <const>
|
||||
// s_movk_i32 s34, <const>
|
||||
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
|
||||
// is used to define an inline constant buffer
|
||||
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* p0 = handle->Arg(0).InstRecursive();
|
||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
|
||||
!p0->Arg(1).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
IR::Inst* p1 = handle->Arg(1).InstRecursive();
|
||||
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
|
||||
return -1;
|
||||
}
|
||||
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
// We have found this pattern. Build the sharp.
|
||||
std::array<u64, 2> buffer;
|
||||
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
|
||||
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
|
||||
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
||||
// Assign a binding to this sharp.
|
||||
return descriptors.Add(BufferResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.stride = cbuf.GetStride(),
|
||||
.num_records = u32(cbuf.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.inline_cbuf = cbuf,
|
||||
.is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
|
||||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
}
|
||||
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
|
@ -217,7 +290,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
if (inst_info.is_typed) {
|
||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
|
|
|
@ -16,6 +16,16 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedS8:
|
||||
case IR::Opcode::LoadSharedU8:
|
||||
case IR::Opcode::WriteSharedU8:
|
||||
info.uses_shared_u8 = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
info.uses_shared_u16 = true;
|
||||
break;
|
||||
case IR::Opcode::QuadShuffle:
|
||||
info.uses_group_quad = true;
|
||||
break;
|
||||
|
|
|
@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
|
|||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
struct SccLoTag : FlagTag {};
|
||||
struct VccHiTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
|
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
|
|||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag, VccHiTag>;
|
||||
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -83,6 +84,13 @@ struct DefTable {
|
|||
exec_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SccLoTag) {
|
||||
return scc_lo_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
|
||||
scc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||
return vcc_lo_flag[block];
|
||||
}
|
||||
|
@ -108,6 +116,7 @@ struct DefTable {
|
|||
ValueMap scc_flag;
|
||||
ValueMap exec_flag;
|
||||
ValueMap vcc_flag;
|
||||
ValueMap scc_lo_flag;
|
||||
ValueMap vcc_lo_flag;
|
||||
ValueMap vcc_hi_flag;
|
||||
};
|
||||
|
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
|
|||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetSccLo:
|
||||
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
|
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetSccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
|
|
|
@ -14,7 +14,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) {
|
|||
BlockList post_order_blocks;
|
||||
|
||||
if (root.type != AbstractSyntaxNode::Type::Block) {
|
||||
throw LogicError("First node in abstract syntax list root is not a block");
|
||||
UNREACHABLE_MSG("First node in abstract syntax list root is not a block");
|
||||
}
|
||||
Block* const first_block{root.data.block};
|
||||
visited.insert(first_block);
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
@ -428,10 +428,10 @@ template <RegT Reg>
|
|||
[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
|
||||
const int result{static_cast<int>(reg) + num};
|
||||
if (result >= static_cast<int>(Reg::Max)) {
|
||||
throw LogicError("Overflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Overflow on register arithmetic");
|
||||
}
|
||||
if (result < 0) {
|
||||
throw LogicError("Underflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Underflow on register arithmetic");
|
||||
}
|
||||
return static_cast<Reg>(result);
|
||||
}
|
||||
|
|
|
@ -83,7 +83,7 @@ bool Value::operator==(const Value& other) const {
|
|||
case Type::F64x4:
|
||||
break;
|
||||
}
|
||||
throw LogicError("Invalid type {}", type);
|
||||
UNREACHABLE_MSG("Invalid type {}", type);
|
||||
}
|
||||
|
||||
bool Value::operator!=(const Value& other) const {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue