mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-24 12:25:00 +00:00
shader_recompiler: Implement data share append and consume operations (#814)
* shader_recompiler: Add more format swap modes * texture_cache: Handle stencil texture reads * emulator: Support loading font library * readme: Add thanks section * shader_recompiler: Constant buffers as integers * shader_recompiler: Typed buffers as integers * shader_recompiler: Separate thread bit scalars * We can assume guest shader never mixes them with normal sgprs. This helps avoid errors where ssa could view an sgpr write dominating a thread bit read, due to how control flow is structurized, even though its not possible in actual control flow * shader_recompiler: Implement data append/consume operations * clang format * buffer_cache: Simplify invalidation scheme * video_core: Remove some invalidation remnants * adjust
This commit is contained in:
parent
649527a235
commit
13743b27fc
34 changed files with 512 additions and 272 deletions
|
@ -147,6 +147,7 @@ public:
|
|||
|
||||
/// Intrusively store the value of a register in the block.
|
||||
std::array<Value, NumScalarRegs> ssa_sreg_values;
|
||||
std::array<Value, NumScalarRegs> ssa_sbit_values;
|
||||
std::array<Value, NumVectorRegs> ssa_vreg_values;
|
||||
|
||||
bool has_multiple_predecessors{false};
|
||||
|
|
|
@ -313,21 +313,21 @@ U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
|||
return Inst<U32>(Opcode::ReadConst, base, offset);
|
||||
}
|
||||
|
||||
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
||||
return Inst<F32>(Opcode::ReadConstBuffer, handle, index);
|
||||
U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
||||
return Inst<U32>(Opcode::ReadConstBuffer, handle, index);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address);
|
||||
return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address);
|
||||
case 2:
|
||||
return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address);
|
||||
return Inst(Opcode::LoadBufferU32x2, Flags{info}, handle, address);
|
||||
case 3:
|
||||
return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address);
|
||||
return Inst(Opcode::LoadBufferU32x3, Flags{info}, handle, address);
|
||||
case 4:
|
||||
return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address);
|
||||
return Inst(Opcode::LoadBufferU32x4, Flags{info}, handle, address);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
|
@ -341,17 +341,16 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
const Value& data, BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32,
|
||||
Flags{info}, handle, address, data);
|
||||
Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 2:
|
||||
Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data);
|
||||
Inst(Opcode::StoreBufferU32x2, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 3:
|
||||
Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data);
|
||||
Inst(Opcode::StoreBufferU32x3, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 4:
|
||||
Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data);
|
||||
Inst(Opcode::StoreBufferU32x4, Flags{info}, handle, address, data);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
|
||||
|
@ -410,6 +409,14 @@ void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, con
|
|||
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::DataAppend(const U32& counter) {
|
||||
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
|
||||
}
|
||||
|
||||
U32 IREmitter::DataConsume(const U32& counter) {
|
||||
return Inst<U32>(Opcode::DataConsume, counter, Imm32(0));
|
||||
}
|
||||
|
||||
U32 IREmitter::LaneId() {
|
||||
return Inst<U32>(Opcode::LaneId);
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ public:
|
|||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
|
@ -120,6 +120,8 @@ public:
|
|||
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 DataAppend(const U32& counter);
|
||||
[[nodiscard]] U32 DataConsume(const U32& counter);
|
||||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
||||
|
|
|
@ -51,12 +51,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::Discard:
|
||||
case Opcode::DiscardCond:
|
||||
case Opcode::SetAttribute:
|
||||
case Opcode::StoreBufferF32:
|
||||
case Opcode::StoreBufferF32x2:
|
||||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferFormatF32:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::StoreBufferU32x2:
|
||||
case Opcode::StoreBufferU32x3:
|
||||
case Opcode::StoreBufferU32x4:
|
||||
case Opcode::StoreBufferFormatF32:
|
||||
case Opcode::BufferAtomicIAdd32:
|
||||
case Opcode::BufferAtomicSMin32:
|
||||
case Opcode::BufferAtomicUMin32:
|
||||
|
@ -68,6 +67,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::BufferAtomicOr32:
|
||||
case Opcode::BufferAtomicXor32:
|
||||
case Opcode::BufferAtomicSwap32:
|
||||
case Opcode::DataAppend:
|
||||
case Opcode::DataConsume:
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU32:
|
||||
|
|
|
@ -17,8 +17,7 @@ OPCODE(DiscardCond, Void, U1,
|
|||
|
||||
// Constant memory operations
|
||||
OPCODE(ReadConst, U32, U32x2, U32, )
|
||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||
OPCODE(ReadConstBuffer, U32, Opaque, U32, )
|
||||
|
||||
// Barriers
|
||||
OPCODE(Barrier, Void, )
|
||||
|
@ -77,21 +76,19 @@ OPCODE(UndefU32, U32,
|
|||
OPCODE(UndefU64, U64, )
|
||||
|
||||
// Buffer operations
|
||||
OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
|
||||
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
|
||||
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
|
||||
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
|
||||
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
|
||||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, )
|
||||
OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, )
|
||||
OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, )
|
||||
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, )
|
||||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||
|
@ -101,7 +98,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq
|
|||
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
|
@ -345,3 +342,5 @@ OPCODE(QuadShuffle, U32, U32,
|
|||
OPCODE(ReadFirstLane, U32, U32, )
|
||||
OPCODE(ReadLane, U32, U32, U32 )
|
||||
OPCODE(WriteLane, U32, U32, U32, U32 )
|
||||
OPCODE(DataAppend, U32, U32, U32 )
|
||||
OPCODE(DataConsume, U32, U32, U32 )
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
|
@ -42,11 +41,10 @@ bool IsBufferAtomic(const IR::Inst& inst) {
|
|||
|
||||
bool IsBufferStore(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferAtomic(inst);
|
||||
|
@ -55,25 +53,28 @@ bool IsBufferStore(const IR::Inst& inst) {
|
|||
|
||||
bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferStore(inst);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::DataAppend ||
|
||||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
||||
}
|
||||
|
||||
bool IsTextureBufferInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
|
||||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
|
||||
}
|
||||
|
||||
static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
switch (num_format) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
switch (data_format) {
|
||||
|
@ -98,19 +99,15 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for
|
|||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
return IR::Type::F32;
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
return IR::Type::U32;
|
||||
|
@ -191,6 +188,10 @@ public:
|
|||
|
||||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
// Only one GDS binding can exist.
|
||||
if (desc.is_gds_buffer && existing.is_gds_buffer) {
|
||||
return true;
|
||||
}
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset &&
|
||||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
|
@ -399,8 +400,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -609,6 +609,51 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
@ -625,6 +670,10 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingInstruction(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,8 +44,17 @@ struct GotoVariable : FlagTag {
|
|||
u32 index;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag, VccHiTag, M0Tag>;
|
||||
struct ThreadBitScalar : FlagTag {
|
||||
ThreadBitScalar() = default;
|
||||
explicit ThreadBitScalar(IR::ScalarReg sgpr_) : sgpr{sgpr_} {}
|
||||
|
||||
auto operator<=>(const ThreadBitScalar&) const noexcept = default;
|
||||
|
||||
IR::ScalarReg sgpr;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, ThreadBitScalar,
|
||||
SccFlagTag, ExecFlagTag, VccFlagTag, VccLoTag, VccHiTag, M0Tag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -70,6 +79,13 @@ struct DefTable {
|
|||
goto_vars[variable.index].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, ThreadBitScalar variable) {
|
||||
return block->ssa_sreg_values[RegIndex(variable.sgpr)];
|
||||
}
|
||||
void SetDef(IR::Block* block, ThreadBitScalar variable, const IR::Value& value) {
|
||||
block->ssa_sreg_values[RegIndex(variable.sgpr)] = value;
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SccFlagTag) {
|
||||
return scc_flag[block];
|
||||
}
|
||||
|
@ -173,7 +189,7 @@ public:
|
|||
}
|
||||
|
||||
template <typename Type>
|
||||
IR::Value ReadVariable(Type variable, IR::Block* root_block, bool is_thread_bit = false) {
|
||||
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
|
||||
boost::container::small_vector<ReadState<Type>, 64> stack{
|
||||
ReadState<Type>(nullptr),
|
||||
ReadState<Type>(root_block),
|
||||
|
@ -201,7 +217,7 @@ public:
|
|||
} else if (!block->IsSsaSealed()) {
|
||||
// Incomplete CFG
|
||||
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable)));
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
incomplete_phis[block].insert_or_assign(variable, phi);
|
||||
stack.back().result = IR::Value{&*phi};
|
||||
|
@ -214,7 +230,7 @@ public:
|
|||
} else {
|
||||
// Break potential cycles with operandless phi
|
||||
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable)));
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
WriteVariable(variable, block, IR::Value{phi});
|
||||
|
||||
|
@ -263,9 +279,7 @@ private:
|
|||
template <typename Type>
|
||||
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
|
||||
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
|
||||
const bool is_thread_bit =
|
||||
std::is_same_v<Type, IR::ScalarReg> && phi.Flags<IR::Type>() == IR::Type::U1;
|
||||
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred, is_thread_bit));
|
||||
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
|
||||
}
|
||||
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
|
||||
}
|
||||
|
@ -313,7 +327,11 @@ private:
|
|||
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
const IR::Opcode opcode{inst.GetOpcode()};
|
||||
switch (opcode) {
|
||||
case IR::Opcode::SetThreadBitScalarReg:
|
||||
case IR::Opcode::SetThreadBitScalarReg: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
pass.WriteVariable(ThreadBitScalar{reg}, block, inst.Arg(1));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||
|
@ -345,11 +363,15 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetM0:
|
||||
pass.WriteVariable(M0Tag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::GetThreadBitScalarReg:
|
||||
case IR::Opcode::GetThreadBitScalarReg: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
const IR::Value value = pass.ReadVariable(ThreadBitScalar{reg}, block);
|
||||
inst.ReplaceUsesWith(value);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg;
|
||||
const IR::Value value = pass.ReadVariable(reg, block, thread_bit);
|
||||
const IR::Value value = pass.ReadVariable(reg, block);
|
||||
inst.ReplaceUsesWith(value);
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue