video_core: Bloodborne stabilization pt1 (#543)

* shader_recompiler: Writelane elimination pass + null image fix

* spirv: Implement image derivatives

* texture_cache: Reduce page bit size

* clang format

* slot_vector: Back to debug assert

* vk_graphics_pipeline: Handle null tsharp

* spirv: Revert some change

* vk_instance: Support primitive restart on list topology

* page_manager: Adjust windows exception handler

* clang format

* Remove subres tracking

* Will be done separately
This commit is contained in:
TheTurtle 2024-08-24 22:51:47 +03:00 committed by GitHub
parent 9e4fc17e6c
commit c79b10edc1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 187 additions and 107 deletions

View file

@ -209,10 +209,6 @@ U1 IREmitter::GetVcc() {
return Inst<U1>(Opcode::GetVcc);
}
U32 IREmitter::GetSccLo() {
return Inst<U32>(Opcode::GetSccLo);
}
U32 IREmitter::GetVccLo() {
return Inst<U32>(Opcode::GetVccLo);
}
@ -445,6 +441,18 @@ U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
return Inst<U32>(Opcode::QuadShuffle, value, index);
}
U32 IREmitter::ReadFirstLane(const U32& value) {
return Inst<U32>(Opcode::ReadFirstLane, value);
}
U32 IREmitter::ReadLane(const U32& value, const U32& lane) {
return Inst<U32>(Opcode::ReadLane, value, lane);
}
U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& lane) {
return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());

View file

@ -65,7 +65,6 @@ public:
[[nodiscard]] U1 GetScc();
[[nodiscard]] U1 GetExec();
[[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetSccLo();
[[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi();
void SetScc(const U1& value);
@ -122,6 +121,9 @@ public:
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
[[nodiscard]] U32 ReadFirstLane(const U32& value);
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);

View file

@ -58,7 +58,6 @@ OPCODE(SetAttribute, Void, Attr
OPCODE(GetScc, U1, Void, )
OPCODE(GetExec, U1, Void, )
OPCODE(GetVcc, U1, Void, )
OPCODE(GetSccLo, U32, Void, )
OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, )
OPCODE(SetScc, Void, U1, )
@ -330,19 +329,22 @@ OPCODE(ImageRead, U32x4, Opaq
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Image atomic operations
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(WarpId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )
OPCODE(ReadFirstLane, U32, U32, U32 )
OPCODE(ReadLane, U32, U32, U32 )
OPCODE(WriteLane, U32, U32, U32, U32 )

View file

@ -250,6 +250,18 @@ void FoldCmpClass(IR::Inst& inst) {
}
}
void FoldReadLane(IR::Inst& inst) {
const u32 lane = inst.Arg(1).U32();
IR::Inst* prod = inst.Arg(0).InstRecursive();
while (prod->GetOpcode() == IR::Opcode::WriteLane) {
if (prod->Arg(2).U32() == lane) {
inst.ReplaceUsesWith(prod->Arg(1));
return;
}
prod = prod->Arg(0).InstRecursive();
}
}
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::IAdd32:
@ -289,6 +301,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::SelectF32:
case IR::Opcode::SelectF64:
return FoldSelect(inst);
case IR::Opcode::ReadLane:
return FoldReadLane(inst);
case IR::Opcode::FPNeg32:
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
return;

View file

@ -345,6 +345,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
// Retrieve SGPR pair that holds sbase
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
ASSERT(inst->GetOpcode() != IR::Opcode::ReadConst);
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst->Arg(0).ScalarReg();
}
@ -402,24 +403,13 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
// is used to define an inline constant buffer
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
!p0->Arg(1).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
return -1;
}
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
if (!handle->AreAllArgsImmediates()) {
return -1;
}
// We have found this pattern. Build the sharp.
std::array<u32, 4> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
buffer[1] = 0;
buffer[2] = handle->Arg(2).U32();
buffer[3] = handle->Arg(3).U32();
std::array<u64, 2> buffer;
buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp.
return descriptors.Add(BufferResource{
@ -617,7 +607,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
const IR::Value arg = inst.Arg(arg_pos);
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
const auto read = [&](u32 offset) -> auto {
const auto read = [&](u32 offset) -> IR::U32 {
if (arg.IsImmediate()) {
const u16 comp = (arg.U32() >> offset) & 0x3F;
return ir.Imm32(s32(comp << 26) >> 26);
}
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
};
@ -637,7 +631,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
UNREACHABLE();
}
}
if (inst_info.has_derivatives) {
ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D,
"User derivatives only supported for 2D images");
}
if (inst_info.has_lod_clamp) {
const u32 arg_pos = [&]() -> u32 {
switch (inst.GetOpcode()) {

View file

@ -32,7 +32,6 @@ struct SccFlagTag : FlagTag {};
struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {};
struct SccLoTag : FlagTag {};
struct VccHiTag : FlagTag {};
struct GotoVariable : FlagTag {
@ -45,7 +44,7 @@ struct GotoVariable : FlagTag {
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
VccFlagTag, VccLoTag, VccHiTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable {
@ -84,13 +83,6 @@ struct DefTable {
exec_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SccLoTag) {
return scc_lo_flag[block];
}
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
scc_lo_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccLoTag) {
return vcc_lo_flag[block];
}
@ -133,10 +125,6 @@ IR::Opcode UndefOpcode(const VccLoTag) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const SccLoTag) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const VccHiTag) noexcept {
return IR::Opcode::UndefU32;
}
@ -336,9 +324,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetSccLo:
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetVccLo:
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
break;
@ -371,9 +356,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break;
case IR::Opcode::GetSccLo:
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
break;
case IR::Opcode::GetVccLo:
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
break;

View file

@ -58,6 +58,7 @@ union TextureInstInfo {
BitField<4, 1, u32> explicit_lod;
BitField<5, 1, u32> has_offset;
BitField<6, 2, u32> gather_comp;
BitField<8, 1, u32> has_derivatives;
};
union BufferInstInfo {