shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)

* shader_recompiler: Add LDEXP

* shader_recompiler: Add most image integer atomic ops

* shader_recompiler: Implement shared memory load/store

* shader_recompiler: More image atomics

* externals: Update sirit

* clang format

* cmake: Add missing files

* shader_recompiler: Fix some atomic bugs

* shader_recompiler: Vs outputs

* shader_recompiler: Shared mem has side-effects, fix format component order

* shader_recompiler: Inline constant buffer impl

* video_core: Fix regressions

* Work

* Fixup a few things
This commit is contained in:
TheTurtle 2024-07-05 00:15:44 +03:00 committed by GitHub
parent af3bbc33e9
commit 6ceab6dfac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 1597 additions and 310 deletions

View file

@ -4,8 +4,8 @@
#pragma once
#include <fmt/format.h>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
@ -88,10 +88,10 @@ constexpr size_t NumParams = 32;
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
const int result{static_cast<int>(attr) + num};
if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic");
UNREACHABLE_MSG("Overflow on register arithmetic");
}
if (result < static_cast<int>(Attribute::RenderTarget0)) {
throw LogicError("Underflow on register arithmetic");
UNREACHABLE_MSG("Underflow on register arithmetic");
}
return static_cast<Attribute>(result);
}

View file

@ -39,10 +39,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
void Block::AddBranch(Block* block) {
if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
throw LogicError("Successor already inserted");
UNREACHABLE_MSG("Successor already inserted");
}
if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
throw LogicError("Predecessor already inserted");
UNREACHABLE_MSG("Predecessor already inserted");
}
imm_successors.push_back(block);
block->imm_predecessors.push_back(this);

View file

@ -115,6 +115,18 @@ void IREmitter::Discard() {
Inst(Opcode::Discard);
}
void IREmitter::Barrier() {
Inst(Opcode::Barrier);
}
void IREmitter::WorkgroupMemoryBarrier() {
Inst(Opcode::WorkgroupMemoryBarrier);
}
void IREmitter::DeviceMemoryBarrier() {
Inst(Opcode::DeviceMemoryBarrier);
}
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
return Inst<U32>(Opcode::GetUserData, reg);
}
@ -200,6 +212,10 @@ U1 IREmitter::GetVcc() {
return Inst<U1>(Opcode::GetVcc);
}
U32 IREmitter::GetSccLo() {
return Inst<U32>(Opcode::GetSccLo);
}
U32 IREmitter::GetVccLo() {
return Inst<U32>(Opcode::GetVccLo);
}
@ -220,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value);
}
void IREmitter::SetSccLo(const U32& value) {
Inst(Opcode::SetSccLo, value);
}
void IREmitter::SetVccLo(const U32& value) {
Inst(Opcode::SetVccLo, value);
}
@ -240,22 +260,25 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
}
U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) {
/*switch (bit_size) {
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) {
case 8:
return Inst<U32>(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset);
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
case 16:
return Inst<U32>(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset);
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
case 32:
return Inst<U32>(Opcode::ReadSharedU32, offset);
return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64:
return Inst<U64>(Opcode::ReadSharedU64, offset);
return Inst<U64>(Opcode::LoadSharedU64, offset);
case 128:
return Inst(Opcode::LoadSharedU128, offset);
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
UNREACHABLE_MSG("Invalid bit size {}", bit_size);*/
}
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
/*switch (bit_size) {
switch (bit_size) {
case 8:
Inst(Opcode::WriteSharedU8, offset, value);
break;
@ -268,9 +291,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
case 64:
Inst(Opcode::WriteSharedU64, offset, value);
break;
case 128:
Inst(Opcode::WriteSharedU128, offset, value);
break;
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}*/
}
}
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
@ -603,6 +629,10 @@ F32 IREmitter::FPExp2(const F32& value) {
return Inst<F32>(Opcode::FPExp2, value);
}
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
return Inst<F32>(Opcode::FPLdexp, value, exp);
}
F32 IREmitter::FPLog2(const F32& value) {
return Inst<F32>(Opcode::FPLog2, value);
}
@ -810,6 +840,17 @@ U1 IREmitter::FPIsNan(const F32F64& value) {
}
}
U1 IREmitter::FPIsInf(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<U1>(Opcode::FPIsInf32, value);
case Type::F64:
return Inst<U1>(Opcode::FPIsInf64, value);
default:
ThrowInvalidType(value.Type());
}
}
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
@ -866,6 +907,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
}
}
Value IREmitter::IAddCary(const U32& a, const U32& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::IAddCary32, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
@ -1142,6 +1195,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
}
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
switch (result_bitsize) {
case 16:
switch (value.Type()) {
case Type::U32:
return Inst<U16>(Opcode::ConvertU16U32, value);
}
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
@ -1163,6 +1223,73 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicIAdd32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicSMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicUMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMin(handle, coords, value, info)
: ImageAtomicUMin(handle, coords, value, info);
}
Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicSMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMax(handle, coords, value, info)
: ImageAtomicUMax(handle, coords, value, info);
}
Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicInc32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicDec32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicAnd32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicOr32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicXor32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
const Value& offset, const F32& lod_clamp,
TextureInstInfo info) {

View file

@ -43,6 +43,10 @@ public:
void Epilogue();
void Discard();
void Barrier();
void WorkgroupMemoryBarrier();
void DeviceMemoryBarrier();
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
@ -60,11 +64,13 @@ public:
[[nodiscard]] U1 GetScc();
[[nodiscard]] U1 GetExec();
[[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetSccLo();
[[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi();
void SetScc(const U1& value);
void SetExec(const U1& value);
void SetVcc(const U1& value);
void SetSccLo(const U32& value);
void SetVccLo(const U32& value);
void SetVccHi(const U32& value);
@ -74,7 +80,7 @@ public:
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
@ -120,6 +126,7 @@ public:
[[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] F32 FPSqrt(const F32& value);
@ -139,14 +146,16 @@ public:
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPIsNan(const F32F64& value);
[[nodiscard]] U1 FPIsInf(const F32F64& value);
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value);
@ -199,6 +208,33 @@ public:
[[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);

View file

@ -40,6 +40,9 @@ Inst::~Inst() {
bool Inst::MayHaveSideEffects() const noexcept {
switch (op) {
case Opcode::Barrier:
case Opcode::WorkgroupMemoryBarrier:
case Opcode::DeviceMemoryBarrier:
case Opcode::ConditionRef:
case Opcode::Reference:
case Opcode::PhiMove:
@ -52,7 +55,23 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferU32:
case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32:
case Opcode::WriteSharedU16:
case Opcode::WriteSharedU8:
case Opcode::ImageWrite:
case Opcode::ImageAtomicIAdd32:
case Opcode::ImageAtomicSMin32:
case Opcode::ImageAtomicUMin32:
case Opcode::ImageAtomicSMax32:
case Opcode::ImageAtomicUMax32:
case Opcode::ImageAtomicInc32:
case Opcode::ImageAtomicDec32:
case Opcode::ImageAtomicAnd32:
case Opcode::ImageAtomicOr32:
case Opcode::ImageAtomicXor32:
case Opcode::ImageAtomicExchange32:
return true;
default:
return false;
@ -61,7 +80,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
bool Inst::AreAllArgsImmediates() const {
if (op == Opcode::Phi) {
throw LogicError("Testing for all arguments are immediates on phi instruction");
UNREACHABLE_MSG("Testing for all arguments are immediates on phi instruction");
}
return std::all_of(args.begin(), args.begin() + NumArgs(),
[](const IR::Value& value) { return value.IsImmediate(); });
@ -91,7 +110,7 @@ void Inst::SetArg(size_t index, Value value) {
Block* Inst::PhiBlock(size_t index) const {
if (op != Opcode::Phi) {
throw LogicError("{} is not a Phi instruction", op);
UNREACHABLE_MSG("{} is not a Phi instruction", op);
}
if (index >= phi_args.size()) {
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
@ -143,7 +162,7 @@ void Inst::ReplaceUsesWith(Value replacement) {
void Inst::ReplaceOpcode(IR::Opcode opcode) {
if (opcode == IR::Opcode::Phi) {
throw LogicError("Cannot transition into Phi");
UNREACHABLE_MSG("Cannot transition into Phi");
}
if (op == Opcode::Phi) {
// Transition out of phi arguments into non-phi

View file

@ -19,6 +19,25 @@ OPCODE(ReadConst, U32, U32x
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
// Barriers
OPCODE(Barrier, Void, )
OPCODE(WorkgroupMemoryBarrier, Void, )
OPCODE(DeviceMemoryBarrier, Void, )
// Shared memory operations
OPCODE(LoadSharedU8, U32, U32, )
OPCODE(LoadSharedS8, U32, U32, )
OPCODE(LoadSharedU16, U32, U32, )
OPCODE(LoadSharedS16, U32, U32, )
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(LoadSharedU128, U32x4, U32, )
OPCODE(WriteSharedU8, Void, U32, U32, )
OPCODE(WriteSharedU16, Void, U32, U32, )
OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU128, Void, U32, U32x4, )
// Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, )
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
@ -37,11 +56,13 @@ OPCODE(SetAttribute, Void, Attr
OPCODE(GetScc, U1, Void, )
OPCODE(GetExec, U1, Void, )
OPCODE(GetVcc, U1, Void, )
OPCODE(GetSccLo, U32, Void, )
OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, )
OPCODE(SetScc, Void, U1, )
OPCODE(SetExec, Void, U1, )
OPCODE(SetVcc, Void, U1, )
OPCODE(SetSccLo, Void, U32, )
OPCODE(SetVccLo, Void, U32, )
OPCODE(SetVccHi, Void, U32, )
@ -148,6 +169,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, )
OPCODE(FPLdexp, F32, F32, U32, )
OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate32, F32, F32, )
@ -190,10 +212,13 @@ OPCODE(FPUnordGreaterThanEqual32, U1, F32,
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPIsNan32, U1, F32, )
OPCODE(FPIsNan64, U1, F64, )
OPCODE(FPIsInf32, U1, F32, )
OPCODE(FPIsInf64, U1, F64, )
// Integer operations
OPCODE(IAdd32, U32, U32, U32, )
OPCODE(IAdd64, U64, U64, U64, )
OPCODE(IAddCary32, U32x2, U32, U32, )
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
@ -258,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, )
OPCODE(ConvertF32U16, F32, U16, )
OPCODE(ConvertU16U32, U16, U32, )
// Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
@ -273,6 +299,19 @@ OPCODE(ImageGradient, F32x4, Opaq
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Image atomic operations
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )

View file

@ -324,8 +324,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::BitFieldUExtract:
FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
base, shift, count);
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
base, shift, count);
}
return (base >> shift) & ((1U << count) - 1);
});
@ -336,8 +336,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
const size_t left_shift{32 - back_shift};
const size_t right_shift{static_cast<size_t>(32 - count)};
if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
base, shift, count);
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
base, shift, count);
}
return static_cast<u32>((base << left_shift) >> right_shift);
});
@ -345,8 +345,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::BitFieldInsert:
FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
if (bits >= 32 || offset >= 32) {
throw LogicError("Undefined result in {}({}, {}, {}, {})",
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
UNREACHABLE_MSG("Undefined result in {}({}, {}, {}, {})",
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
}
return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
});

View file

@ -89,6 +89,17 @@ bool IsImageInstruction(const IR::Inst& inst) {
case IR::Opcode::ImageGradient:
case IR::Opcode::ImageRead:
case IR::Opcode::ImageWrite:
case IR::Opcode::ImageAtomicIAdd32:
case IR::Opcode::ImageAtomicSMin32:
case IR::Opcode::ImageAtomicUMin32:
case IR::Opcode::ImageAtomicSMax32:
case IR::Opcode::ImageAtomicUMax32:
case IR::Opcode::ImageAtomicInc32:
case IR::Opcode::ImageAtomicDec32:
case IR::Opcode::ImageAtomicAnd32:
case IR::Opcode::ImageAtomicOr32:
case IR::Opcode::ImageAtomicXor32:
case IR::Opcode::ImageAtomicExchange32:
return true;
default:
return false;
@ -99,6 +110,17 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageWrite:
case IR::Opcode::ImageRead:
case IR::Opcode::ImageAtomicIAdd32:
case IR::Opcode::ImageAtomicSMin32:
case IR::Opcode::ImageAtomicUMin32:
case IR::Opcode::ImageAtomicSMax32:
case IR::Opcode::ImageAtomicUMax32:
case IR::Opcode::ImageAtomicInc32:
case IR::Opcode::ImageAtomicDec32:
case IR::Opcode::ImageAtomicAnd32:
case IR::Opcode::ImageAtomicOr32:
case IR::Opcode::ImageAtomicXor32:
case IR::Opcode::ImageAtomicExchange32:
return true;
default:
return false;
@ -115,7 +137,8 @@ public:
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset;
desc.dword_offset == existing.dword_offset &&
desc.inline_cbuf == existing.inline_cbuf;
})};
auto& buffer = buffer_resources[index];
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
@ -196,20 +219,70 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
};
}
static constexpr size_t MaxUboSize = 65536;
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
AmdGpu::Buffer& cbuf) {
// Assuming V# is in UD s[32:35]
// The next pattern:
// s_getpc_b64 s[32:33]
// s_add_u32 s32, <const>, s32
// s_addc_u32 s33, 0, s33
// s_mov_b32 s35, <const>
// s_movk_i32 s34, <const>
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
// is used to define an inline constant buffer
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
!p0->Arg(1).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
return -1;
}
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
return -1;
}
// We have found this pattern. Build the sharp.
std::array<u64, 2> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp.
return descriptors.Add(BufferResource{
.sgpr_base = std::numeric_limits<u32>::max(),
.dword_offset = 0,
.stride = cbuf.GetStride(),
.num_records = u32(cbuf.num_records),
.used_types = BufferDataType(inst),
.inline_cbuf = cbuf,
.is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
});
}
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
static constexpr size_t MaxUboSize = 65536;
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
const u32 binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.stride = buffer.GetStride(),
.num_records = u32(buffer.num_records),
.used_types = BufferDataType(inst),
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
});
s32 binding{};
AmdGpu::Buffer buffer;
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.stride = buffer.GetStride(),
.num_records = u32(buffer.num_records),
.used_types = BufferDataType(inst),
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
});
}
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
// Replace handle with binding index in buffer resource list.
@ -217,7 +290,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32));
}
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {

View file

@ -16,6 +16,16 @@ void Visit(Info& info, IR::Inst& inst) {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
case IR::Opcode::LoadSharedS8:
case IR::Opcode::LoadSharedU8:
case IR::Opcode::WriteSharedU8:
info.uses_shared_u8 = true;
break;
case IR::Opcode::LoadSharedS16:
case IR::Opcode::LoadSharedU16:
case IR::Opcode::WriteSharedU16:
info.uses_shared_u16 = true;
break;
case IR::Opcode::QuadShuffle:
info.uses_group_quad = true;
break;

View file

@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {};
struct SccLoTag : FlagTag {};
struct VccHiTag : FlagTag {};
struct GotoVariable : FlagTag {
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, VccLoTag, VccHiTag>;
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable {
@ -83,6 +84,13 @@ struct DefTable {
exec_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SccLoTag) {
return scc_lo_flag[block];
}
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
scc_lo_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccLoTag) {
return vcc_lo_flag[block];
}
@ -108,6 +116,7 @@ struct DefTable {
ValueMap scc_flag;
ValueMap exec_flag;
ValueMap vcc_flag;
ValueMap scc_lo_flag;
ValueMap vcc_lo_flag;
ValueMap vcc_hi_flag;
};
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
return IR::Opcode::UndefU32;
}
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetSccLo:
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetVccLo:
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
break;
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break;
case IR::Opcode::GetSccLo:
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
break;
case IR::Opcode::GetVccLo:
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
break;

View file

@ -14,7 +14,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) {
BlockList post_order_blocks;
if (root.type != AbstractSyntaxNode::Type::Block) {
throw LogicError("First node in abstract syntax list root is not a block");
UNREACHABLE_MSG("First node in abstract syntax list root is not a block");
}
Block* const first_block{root.data.block};
visited.insert(first_block);

View file

@ -3,9 +3,9 @@
#pragma once
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "shader_recompiler/exception.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR {
@ -428,10 +428,10 @@ template <RegT Reg>
[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
const int result{static_cast<int>(reg) + num};
if (result >= static_cast<int>(Reg::Max)) {
throw LogicError("Overflow on register arithmetic");
UNREACHABLE_MSG("Overflow on register arithmetic");
}
if (result < 0) {
throw LogicError("Underflow on register arithmetic");
UNREACHABLE_MSG("Underflow on register arithmetic");
}
return static_cast<Reg>(result);
}

View file

@ -83,7 +83,7 @@ bool Value::operator==(const Value& other) const {
case Type::F64x4:
break;
}
throw LogicError("Invalid type {}", type);
UNREACHABLE_MSG("Invalid type {}", type);
}
bool Value::operator!=(const Value& other) const {