video_core: Preliminary storage image support and more (#188)

* vk_rasterizer: Clear depth buffer when DB_RENDER_CONTROL says so

* video_core: Preliminary storage image support, more opcodes

* renderer_vulkan: a fix for vertex buffers merging

* renderer_vulkan: a heuristic for blend override when alpha out is masked

---------

Co-authored-by: psucien <bad_cast@protonmail.com>
This commit is contained in:
TheTurtle 2024-06-10 22:35:14 +03:00 committed by GitHub
parent 23f11a3fda
commit 7b1a317b09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 429 additions and 101 deletions

View file

@ -318,6 +318,14 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
}
}
U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId);
}
U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
return Inst<U32>(Opcode::QuadShuffle, value, index);
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());

View file

@ -85,12 +85,8 @@ public:
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
BufferInstInfo info);
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
[[nodiscard]] U1 GetSignFromOp(const Value& op);
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
[[nodiscard]] U1 GetOverflowFromOp(const Value& op);
[[nodiscard]] U1 GetSparseFromOp(const Value& op);
[[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);

View file

@ -52,6 +52,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferU32:
case Opcode::ImageWrite:
return true;
default:
return false;

View file

@ -269,3 +269,7 @@ OPCODE(ImageQueryLod, F32x4, Opaq
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )

View file

@ -93,6 +93,16 @@ bool IsImageInstruction(const IR::Inst& inst) {
}
}
bool IsImageStorageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageWrite:
case IR::Opcode::ImageRead:
return true;
default:
return false;
}
}
class Descriptors {
public:
explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_,
@ -241,32 +251,42 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
IR::Inst* producer = inst.Arg(0).InstRecursive();
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
producer->GetOpcode() == IR::Opcode::GetUserData);
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
return std::make_pair(producer->Arg(0).InstRecursive(),
producer->Arg(1).InstRecursive());
}
return std::make_pair(producer, nullptr);
}();
// Read image sharp.
const auto tsharp = TrackSharp(producer->Arg(0).InstRecursive());
const auto tsharp = TrackSharp(tsharp_handle);
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const u32 image_binding = descriptors.Add(ImageResource{
u32 image_binding = descriptors.Add(ImageResource{
.sgpr_base = tsharp.sgpr_base,
.dword_offset = tsharp.dword_offset,
.type = image.type,
.nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()),
.is_storage = false,
.is_storage = IsImageStorageInstruction(inst),
.is_depth = bool(inst_info.is_depth),
});
// Read sampler sharp.
const auto ssharp = TrackSharp(producer->Arg(1).InstRecursive());
const u32 sampler_binding = descriptors.Add(SamplerResource{
.sgpr_base = ssharp.sgpr_base,
.dword_offset = ssharp.dword_offset,
});
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
if (ssharp_handle) {
const auto ssharp = TrackSharp(ssharp_handle);
const u32 sampler_binding = descriptors.Add(SamplerResource{
.sgpr_base = ssharp.sgpr_base,
.dword_offset = ssharp.dword_offset,
});
image_binding |= (sampler_binding << 16);
}
// Patch image handle
const u32 handle = image_binding | (sampler_binding << 16);
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(handle));
inst.SetArg(0, ir.Imm32(image_binding));
// Now that we know the image type, adjust texture coordinate vector.
const IR::Inst* body = inst.Arg(1).InstRecursive();
@ -283,7 +303,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
case AmdGpu::ImageType::Cube:
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
default:
UNREACHABLE();
UNREACHABLE_MSG("Unknown image type {}", image.type.Value());
}
}();
inst.SetArg(1, coords);
@ -293,6 +313,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
inst.SetArg(arg_pos, arg);
}
if (inst_info.explicit_lod && inst.GetOpcode() == IR::Opcode::ImageFetch) {
inst.SetArg(3, arg);
}
}
void ResourceTrackingPass(IR::Program& program) {

View file

@ -16,6 +16,9 @@ void Visit(Info& info, IR::Inst& inst) {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
case IR::Opcode::QuadShuffle:
info.uses_group_quad = true;
break;
default:
break;
}