mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-24 12:25:00 +00:00
video_core: Bloodborne stabilization pt1 (#543)
* shader_recompiler: Writelane elimination pass + null image fix * spirv: Implement image derivatives * texture_cache: Reduce page bit size * clang format * slot_vector: Back to debug assert * vk_graphics_pipeline: Handle null tsharp * spirv: Revert some change * vk_instance: Support primitive restart on list topology * page_manager: Adjust windows exception handler * clang format * Remove subres tracking * Will be done separately
This commit is contained in:
parent
9e4fc17e6c
commit
c79b10edc1
25 changed files with 187 additions and 107 deletions
|
@ -209,10 +209,6 @@ U1 IREmitter::GetVcc() {
|
|||
return Inst<U1>(Opcode::GetVcc);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetSccLo() {
|
||||
return Inst<U32>(Opcode::GetSccLo);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetVccLo() {
|
||||
return Inst<U32>(Opcode::GetVccLo);
|
||||
}
|
||||
|
@ -445,6 +441,18 @@ U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
|
|||
return Inst<U32>(Opcode::QuadShuffle, value, index);
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadFirstLane(const U32& value) {
|
||||
return Inst<U32>(Opcode::ReadFirstLane, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadLane(const U32& value, const U32& lane) {
|
||||
return Inst<U32>(Opcode::ReadLane, value, lane);
|
||||
}
|
||||
|
||||
U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& lane) {
|
||||
return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
|
|
@ -65,7 +65,6 @@ public:
|
|||
[[nodiscard]] U1 GetScc();
|
||||
[[nodiscard]] U1 GetExec();
|
||||
[[nodiscard]] U1 GetVcc();
|
||||
[[nodiscard]] U32 GetSccLo();
|
||||
[[nodiscard]] U32 GetVccLo();
|
||||
[[nodiscard]] U32 GetVccHi();
|
||||
void SetScc(const U1& value);
|
||||
|
@ -122,6 +121,9 @@ public:
|
|||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
||||
[[nodiscard]] U32 ReadFirstLane(const U32& value);
|
||||
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
|
||||
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
|
||||
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||
|
|
|
@ -58,7 +58,6 @@ OPCODE(SetAttribute, Void, Attr
|
|||
OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetExec, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
OPCODE(GetSccLo, U32, Void, )
|
||||
OPCODE(GetVccLo, U32, Void, )
|
||||
OPCODE(GetVccHi, U32, Void, )
|
||||
OPCODE(SetScc, Void, U1, )
|
||||
|
@ -330,19 +329,22 @@ OPCODE(ImageRead, U32x4, Opaq
|
|||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||
|
||||
// Image atomic operations
|
||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(WarpId, U32, )
|
||||
OPCODE(QuadShuffle, U32, U32, U32 )
|
||||
OPCODE(ReadFirstLane, U32, U32, U32 )
|
||||
OPCODE(ReadLane, U32, U32, U32 )
|
||||
OPCODE(WriteLane, U32, U32, U32, U32 )
|
||||
|
|
|
@ -250,6 +250,18 @@ void FoldCmpClass(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void FoldReadLane(IR::Inst& inst) {
|
||||
const u32 lane = inst.Arg(1).U32();
|
||||
IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||
while (prod->GetOpcode() == IR::Opcode::WriteLane) {
|
||||
if (prod->Arg(2).U32() == lane) {
|
||||
inst.ReplaceUsesWith(prod->Arg(1));
|
||||
return;
|
||||
}
|
||||
prod = prod->Arg(0).InstRecursive();
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
|
@ -289,6 +301,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::SelectF32:
|
||||
case IR::Opcode::SelectF64:
|
||||
return FoldSelect(inst);
|
||||
case IR::Opcode::ReadLane:
|
||||
return FoldReadLane(inst);
|
||||
case IR::Opcode::FPNeg32:
|
||||
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
|
||||
return;
|
||||
|
|
|
@ -345,6 +345,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
|
||||
// Retrieve SGPR pair that holds sbase
|
||||
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||
ASSERT(inst->GetOpcode() != IR::Opcode::ReadConst);
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst->Arg(0).ScalarReg();
|
||||
}
|
||||
|
@ -402,24 +403,13 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
|||
// is used to define an inline constant buffer
|
||||
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* p0 = handle->Arg(0).InstRecursive();
|
||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
|
||||
!p0->Arg(1).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
IR::Inst* p1 = handle->Arg(1).InstRecursive();
|
||||
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
|
||||
return -1;
|
||||
}
|
||||
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
|
||||
if (!handle->AreAllArgsImmediates()) {
|
||||
return -1;
|
||||
}
|
||||
// We have found this pattern. Build the sharp.
|
||||
std::array<u32, 4> buffer;
|
||||
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
|
||||
buffer[1] = 0;
|
||||
buffer[2] = handle->Arg(2).U32();
|
||||
buffer[3] = handle->Arg(3).U32();
|
||||
std::array<u64, 2> buffer;
|
||||
buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
|
||||
buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
|
||||
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
||||
// Assign a binding to this sharp.
|
||||
return descriptors.Add(BufferResource{
|
||||
|
@ -617,7 +607,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
const IR::Value arg = inst.Arg(arg_pos);
|
||||
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
||||
|
||||
const auto read = [&](u32 offset) -> auto {
|
||||
const auto read = [&](u32 offset) -> IR::U32 {
|
||||
if (arg.IsImmediate()) {
|
||||
const u16 comp = (arg.U32() >> offset) & 0x3F;
|
||||
return ir.Imm32(s32(comp << 26) >> 26);
|
||||
}
|
||||
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
|
||||
};
|
||||
|
||||
|
@ -637,7 +631,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (inst_info.has_derivatives) {
|
||||
ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D,
|
||||
"User derivatives only supported for 2D images");
|
||||
}
|
||||
if (inst_info.has_lod_clamp) {
|
||||
const u32 arg_pos = [&]() -> u32 {
|
||||
switch (inst.GetOpcode()) {
|
||||
|
|
|
@ -32,7 +32,6 @@ struct SccFlagTag : FlagTag {};
|
|||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
struct SccLoTag : FlagTag {};
|
||||
struct VccHiTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
|
@ -45,7 +44,7 @@ struct GotoVariable : FlagTag {
|
|||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
|
||||
VccFlagTag, VccLoTag, VccHiTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -84,13 +83,6 @@ struct DefTable {
|
|||
exec_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SccLoTag) {
|
||||
return scc_lo_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
|
||||
scc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||
return vcc_lo_flag[block];
|
||||
}
|
||||
|
@ -133,10 +125,6 @@ IR::Opcode UndefOpcode(const VccLoTag) noexcept {
|
|||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const SccLoTag) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const VccHiTag) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
@ -336,9 +324,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetSccLo:
|
||||
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
|
@ -371,9 +356,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetSccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
|
|
|
@ -58,6 +58,7 @@ union TextureInstInfo {
|
|||
BitField<4, 1, u32> explicit_lod;
|
||||
BitField<5, 1, u32> has_offset;
|
||||
BitField<6, 2, u32> gather_comp;
|
||||
BitField<8, 1, u32> has_derivatives;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue