mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-23 03:45:00 +00:00
video_core: Add image support
This commit is contained in:
parent
729e166cd3
commit
d59b102b6f
48 changed files with 1264 additions and 259 deletions
|
@ -110,6 +110,8 @@ std::string NameOf(Attribute attribute) {
|
|||
return "InstanceId";
|
||||
case Attribute::FragCoord:
|
||||
return "FragCoord";
|
||||
case Attribute::IsFrontFace:
|
||||
return "IsFrontFace";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -227,14 +227,8 @@ U32 IREmitter::ReadConst(const U64& address, const U32& offset) {
|
|||
return Inst<U32>(Opcode::ReadConst, address, offset);
|
||||
}
|
||||
|
||||
template <>
|
||||
U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) {
|
||||
return Inst<U32>(Opcode::ReadConstBuffer, handle, index, offset);
|
||||
}
|
||||
|
||||
template <>
|
||||
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) {
|
||||
return Inst<F32>(Opcode::ReadConstBufferF32, handle, index, offset);
|
||||
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
||||
return Inst<F32>(Opcode::ReadConstBuffer, handle, index);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
|
|
|
@ -68,8 +68,7 @@ public:
|
|||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset);
|
||||
template <typename T = U32>
|
||||
[[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset);
|
||||
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
|
|
|
@ -15,8 +15,7 @@ OPCODE(Epilogue, Void,
|
|||
|
||||
// Constant memory operations
|
||||
OPCODE(ReadConst, U32, U64, U32, )
|
||||
OPCODE(ReadConstBuffer, U32, Opaque, U32, U32 )
|
||||
OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 )
|
||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
|
|
|
@ -88,15 +88,15 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||
return;
|
||||
}
|
||||
if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
// Replace the bitcast with a typed constant buffer read
|
||||
inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
// if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||
// if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
// // Replace the bitcast with a typed constant buffer read
|
||||
// inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
||||
// inst.SetArg(0, arg_inst->Arg(0));
|
||||
// inst.SetArg(1, arg_inst->Arg(1));
|
||||
// return;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
||||
|
|
|
@ -28,7 +28,6 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferF32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -41,6 +40,7 @@ IR::Type BufferLoadType(const IR::Inst& inst) {
|
|||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
return IR::Type::F32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -69,8 +69,10 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
|||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(BufferResourceList& buffer_resources_)
|
||||
: buffer_resources{buffer_resources_} {}
|
||||
explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_,
|
||||
SamplerResourceList& sampler_resources_)
|
||||
: buffer_resources{buffer_resources_}, image_resources{image_resources_},
|
||||
sampler_resources{sampler_resources_} {}
|
||||
|
||||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
|
@ -84,6 +86,23 @@ public:
|
|||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const ImageResource& desc) {
|
||||
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset && desc.type == existing.type &&
|
||||
desc.is_storage == existing.is_storage;
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const SamplerResource& desc) {
|
||||
const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Descriptors, typename Descriptor, typename Func>
|
||||
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
||||
|
@ -96,6 +115,8 @@ private:
|
|||
}
|
||||
|
||||
BufferResourceList& buffer_resources;
|
||||
ImageResourceList& image_resources;
|
||||
SamplerResourceList& sampler_resources;
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
@ -118,8 +139,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
|
||||
// Retrieve SGPR that holds sbase
|
||||
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetScalarRegister,
|
||||
"Nested resource loads not supported");
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");
|
||||
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
|
||||
|
||||
// Return retrieved location.
|
||||
|
@ -140,7 +160,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
.stride = u32(buffer.stride),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferLoadType(inst),
|
||||
.is_storage = buffer.base_address % 64 != 0,
|
||||
.is_storage = /*buffer.base_address % 64 != 0*/ true,
|
||||
});
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
@ -151,6 +171,9 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
// Calculate buffer address.
|
||||
const u32 dword_stride = buffer.stride / sizeof(u32);
|
||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||
|
@ -160,19 +183,79 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
} else if (inst_info.index_enable) {
|
||||
const IR::U32 index{inst.Arg(1)};
|
||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||
} else if (inst_info.offset_enable) {
|
||||
const IR::U32 offset{inst.Arg(1)};
|
||||
}
|
||||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(producer->Arg(0).InstRecursive());
|
||||
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
.type = image.type,
|
||||
.nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()),
|
||||
.is_storage = false,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
});
|
||||
|
||||
// Read sampler sharp.
|
||||
const auto ssharp = TrackSharp(producer->Arg(1).InstRecursive());
|
||||
const u32 sampler_binding = descriptors.Add(SamplerResource{
|
||||
.sgpr_base = ssharp.sgpr_base,
|
||||
.dword_offset = ssharp.dword_offset,
|
||||
});
|
||||
|
||||
// Patch image handle
|
||||
const u32 handle = image_binding | (sampler_binding << 16);
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(handle));
|
||||
|
||||
// Now that we know the image type, adjust texture coordinate vector.
|
||||
const IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
switch (image.type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
return {body->Arg(0), body->Arg(1)};
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
case AmdGpu::ImageType::Color2D:
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst_info.has_lod_clamp) {
|
||||
// Final argument contains lod_clamp
|
||||
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
||||
inst.SetArg(arg_pos, arg);
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers};
|
||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,13 +33,11 @@ union Mode {
|
|||
|
||||
union TextureInstInfo {
|
||||
u32 raw;
|
||||
BitField<0, 16, u32> descriptor_index;
|
||||
BitField<19, 1, u32> is_depth;
|
||||
BitField<20, 1, u32> has_bias;
|
||||
BitField<21, 1, u32> has_lod_clamp;
|
||||
BitField<22, 1, u32> relaxed_precision;
|
||||
BitField<23, 2, u32> gather_component;
|
||||
BitField<25, 2, u32> num_derivatives;
|
||||
BitField<0, 1, u32> is_depth;
|
||||
BitField<1, 1, u32> has_bias;
|
||||
BitField<2, 1, u32> has_lod_clamp;
|
||||
BitField<3, 1, u32> force_level0;
|
||||
BitField<4, 1, u32> explicit_lod;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue