video_core: Implement DMA. (#2819)

* Import memory

* 64K pages and fix memory mapping

* Queue coverage

* Buffer syncing, faulted readback adn BDA in Buffer

* Base DMA implementation

* Preparations for implementing SPV DMA access

* Base impl (pending 16K pages and getbuffersize)

* 16K pages and stack overflow fix

* clang-format

* clang-format but for real this time

* Try to fix macOS build

* Correct decltype

* Add testing log

* Fix stride and patch phi node blocks

* No need to check if it is a deleted buffer

* Clang format once more

* Offset in bytes

* Removed host buffers (may do it in another PR)

Also some random barrier fixes

* Add IR dumping from my read-const branch

* clang-format

* Correct size insteed of end

* Fix incorrect assert

* Possible fix for NieR deadlock

* Copy to avoid deadlock

* Use 2 mutexes insteed of copy

* Attempt to range sync error

* Revert "Attempt to range sync error"

This reverts commit dd287b48682b50f215680bb0956e39c2809bf3fe.

* Fix size truncated when syncing range

And memory barrier

* Some fixes (and async testing (doesn't work))

* Use compute to parse fault buffer

* Process faults on submit

* Only sync in the first time we see a readconst

Thsi is partialy wrong. We need to save the state into the submission context itself, not the rasterizer since we can yield and process another sumission (if im not understanding wrong).

* Use spec const and 32 bit atomic

* 32 bit counter

* Fix store_index

* Better sync (WIP, breaks PR now)

* Fixes for better sync

* Better sync

* Remove memory coveragte logic

* Point sirit to upstream

* Less waiting and barriers

* Correctly checkout moltenvk

* Bring back applying pending operations in wait

* Sync the whole buffer insteed of only the range

* Implement recursive shared/scoped locks

* Iterators

* Faster syncing with ranges

* Some alignment fixes

* fixed clang format

* Fix clang-format again

* Port page_manager from readbacks-poc

* clang-format

* Defer memory protect

* Remove RENDERER_TRACE

* Experiment: only sync on first readconst

* Added profiling (will be removed)

* Don't sync entire buffers

* Added logging for testing

* Updated temporary workaround to use 4k pages

* clang.-format

* Cleanup part 1

* Make ReadConst a SPIR-V function

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
Lander Gallastegi 2025-05-22 20:00:15 +02:00 committed by GitHub
parent 37887e8fde
commit f9bbde9c79
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 1641 additions and 311 deletions

View file

@ -154,6 +154,7 @@ void Traverse(EmitContext& ctx, const IR::Program& program) {
for (IR::Inst& inst : node.data.block->Instructions()) {
EmitInst(ctx, &inst);
}
ctx.first_to_last_label_map[label.value] = ctx.last_label;
break;
}
case IR::AbstractSyntaxNode::Type::If: {
@ -298,6 +299,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
if (info.dma_types != IR::Type::Void) {
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
}
}
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
@ -387,7 +392,7 @@ void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo&
void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
auto inst{program.blocks.front()->begin()};
size_t block_index{0};
ctx.PatchDeferredPhi([&](size_t phi_arg) {
ctx.PatchDeferredPhi([&](u32 phi_arg, Id first_parent) {
if (phi_arg == 0) {
++inst;
if (inst == program.blocks[block_index]->end() ||
@ -398,7 +403,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
} while (inst->GetOpcode() != IR::Opcode::Phi);
}
}
return ctx.Def(inst->Arg(phi_arg));
const Id arg = ctx.Def(inst->Arg(phi_arg));
const Id parent = ctx.first_to_last_label_map[first_parent.value];
return std::make_pair(arg, parent);
});
}
} // Anonymous namespace

View file

@ -60,7 +60,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
@ -257,7 +257,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
@ -265,7 +265,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);

View file

@ -161,26 +161,25 @@ void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
using BufferAlias = EmitContext::BufferAlias;
using PointerType = EmitContext::PointerType;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
const auto& srt_flatbuf = ctx.buffers.back();
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
LOG_DEBUG(Render_Recompiler, "ReadConst from flatbuf dword {}", flatbuf_off_dw);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const Id ptr{
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
return ctx.OpLoad(ctx.U32[1], ptr);
// We can only provide a fallback for immediate offsets.
if (flatbuf_off_dw == 0) {
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
} else {
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset,
ctx.ConstU32(flatbuf_off_dw));
}
}
template <BufferAlias alias>
template <PointerType type>
Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const auto [id, pointer_type] = buffer[alias];
const auto value_type = alias == BufferAlias::U32 ? ctx.U32[1] : ctx.F32[1];
const auto [id, pointer_type] = buffer[type];
const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(value_type, ptr)};
@ -192,7 +191,7 @@ Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
}
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
return ReadConstBuffer<BufferAlias::U32>(ctx, handle, index);
return ReadConstBuffer<PointerType::U32>(ctx, handle, index);
}
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
@ -251,7 +250,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return ReadConstBuffer<BufferAlias::F32>(ctx, param.buffer_handle, offset);
return ReadConstBuffer<PointerType::F32>(ctx, param.buffer_handle, offset);
}
Id result;
@ -437,7 +436,7 @@ static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size,
return result;
}
template <u32 N, BufferAlias alias>
template <u32 N, PointerType alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
@ -445,7 +444,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
boost::container::static_vector<Id, N> ids;
@ -456,7 +455,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
if (!flags.typed) {
// Untyped loads have bounds checking per-component.
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
result_i, alias == BufferAlias::F32));
result_i, alias == PointerType::F32));
} else {
ids.push_back(result_i);
}
@ -466,7 +465,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
alias == BufferAlias::F32);
alias == PointerType::F32);
}
return result;
}
@ -476,7 +475,7 @@ Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
@ -487,7 +486,7 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
@ -495,35 +494,35 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -553,7 +552,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto
emit_func();
}
template <u32 N, BufferAlias alias>
template <u32 N, PointerType alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
@ -562,7 +561,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
auto store = [&] {
@ -593,7 +592,7 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U8, value)};
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
@ -604,7 +603,7 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U16, value)};
@ -613,35 +612,35 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View file

@ -61,7 +61,7 @@ void EmitSetVectorRegister(EmitContext& ctx);
void EmitSetGotoVariable(EmitContext& ctx);
void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);

View file

@ -7,6 +7,7 @@
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include <boost/container/static_vector.hpp>
#include <fmt/format.h>
@ -70,6 +71,12 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
if (info.dma_types != IR::Type::Void) {
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
} else {
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
}
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
@ -137,9 +144,13 @@ void EmitContext::DefineArithmeticTypes() {
true_value = ConstantTrue(U1[1]);
false_value = ConstantFalse(U1[1]);
u8_one_value = Constant(U8, 1U);
u8_zero_value = Constant(U8, 0U);
u32_one_value = ConstU32(1U);
u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f);
u64_one_value = Constant(U64, 1ULL);
u64_zero_value = Constant(U64, 0ULL);
pi_x2 = ConstF32(2.0f * float{std::numbers::pi});
@ -157,6 +168,35 @@ void EmitContext::DefineArithmeticTypes() {
if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
}
if (True(info.dma_types & IR::Type::F64)) {
physical_pointer_types[PointerType::F64] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
}
if (True(info.dma_types & IR::Type::U64)) {
physical_pointer_types[PointerType::U64] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
}
if (True(info.dma_types & IR::Type::F32)) {
physical_pointer_types[PointerType::F32] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
}
if (True(info.dma_types & IR::Type::U32)) {
physical_pointer_types[PointerType::U32] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
}
if (True(info.dma_types & IR::Type::F16)) {
physical_pointer_types[PointerType::F16] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
}
if (True(info.dma_types & IR::Type::U16)) {
physical_pointer_types[PointerType::U16] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
}
if (True(info.dma_types & IR::Type::U8)) {
physical_pointer_types[PointerType::U8] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
}
}
void EmitContext::DefineInterfaces() {
@ -195,9 +235,10 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
}
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
const auto& srt_flatbuf = buffers.back();
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
// Can this be done with memory access? Like we do now with ReadConst
const auto& srt_flatbuf = buffers[flatbuf_index];
ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf);
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
@ -690,8 +731,14 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
case Shader::BufferType::GdsBuffer:
Name(id, "gds_buffer");
break;
case Shader::BufferType::ReadConstUbo:
Name(id, "srt_flatbuf_ubo");
case Shader::BufferType::Flatbuf:
Name(id, "srt_flatbuf");
break;
case Shader::BufferType::BdaPagetable:
Name(id, "bda_pagetable");
break;
case Shader::BufferType::FaultBuffer:
Name(id, "fault_buffer");
break;
case Shader::BufferType::SharedMemory:
Name(id, "ssbo_shmem");
@ -705,35 +752,53 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
};
void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access && !info.has_readconst) {
// In case ReadConstUbo has not already been bound by IR and is needed
if (!profile.supports_robust_buffer_access &&
info.readconst_types == Info::ReadConstType::None) {
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::ReadConstUbo,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
// In the future we may want to read buffer sizes from GPU memory if available.
// info.readconst_types |= Info::ReadConstType::Immediate;
}
for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile);
// Set indexes for special buffers.
if (desc.buffer_type == BufferType::Flatbuf) {
flatbuf_index = buffers.size();
} else if (desc.buffer_type == BufferType::BdaPagetable) {
bda_pagetable_index = buffers.size();
} else if (desc.buffer_type == BufferType::FaultBuffer) {
fault_buffer_index = buffers.size();
}
// Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
if (True(desc.used_types & IR::Type::U64)) {
spv_buffer[PointerType::U64] =
DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64);
}
if (True(desc.used_types & IR::Type::U32)) {
spv_buffer[BufferAlias::U32] =
spv_buffer[PointerType::U32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
}
if (True(desc.used_types & IR::Type::F32)) {
spv_buffer[BufferAlias::F32] =
spv_buffer[PointerType::F32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
}
if (True(desc.used_types & IR::Type::U16)) {
spv_buffer[BufferAlias::U16] =
spv_buffer[PointerType::U16] =
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
}
if (True(desc.used_types & IR::Type::U8)) {
spv_buffer[BufferAlias::U8] =
spv_buffer[PointerType::U8] =
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
}
++binding.unified;
@ -1003,6 +1068,101 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie
return func;
}
Id EmitContext::DefineGetBdaPointer() {
const auto caching_pagebits{
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS))};
const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
const auto func_type{TypeFunction(U64, U64)};
const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
const auto address{OpFunctionParameter(U64)};
Name(func, "get_bda_pointer");
AddLabel();
const auto fault_label{OpLabel()};
const auto available_label{OpLabel()};
const auto merge_label{OpLabel()};
// Get page BDA
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
const auto page32{OpUConvert(U32[1], page)};
const auto& bda_buffer{buffers[bda_pagetable_index]};
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
const auto bda{OpLoad(U64, bda_ptr)};
// Check if page is GPU cached
const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
OpBranchConditional(is_fault, fault_label, available_label);
// First time acces, mark as fault
AddLabel(fault_label);
const auto& fault_buffer{buffers[fault_buffer_index]};
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))};
const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))};
const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)};
const auto fault_ptr{
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)};
const auto fault_value{OpLoad(U8, fault_ptr)};
const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)};
OpStore(fault_ptr, fault_value_masked);
// Return null pointer
const auto fallback_result{u64_zero_value};
OpBranch(merge_label);
// Value is available, compute address
AddLabel(available_label);
const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
const auto addr{OpIAdd(U64, bda, offset_in_bda)};
OpBranch(merge_label);
// Merge
AddLabel(merge_label);
const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
OpReturnValue(result);
OpFunctionEnd();
return func;
}
Id EmitContext::DefineReadConst(bool dynamic) {
const auto func_type{!dynamic ? TypeFunction(U32[1], U32[2], U32[1], U32[1])
: TypeFunction(U32[1], U32[2], U32[1])};
const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
const auto base{OpFunctionParameter(U32[2])};
const auto offset{OpFunctionParameter(U32[1])};
const auto flatbuf_offset{!dynamic ? OpFunctionParameter(U32[1]) : Id{}};
Name(func, dynamic ? "read_const_dynamic" : "read_const");
AddLabel();
const auto base_lo{OpUConvert(U64, OpCompositeExtract(U32[1], base, 0))};
const auto base_hi{OpUConvert(U64, OpCompositeExtract(U32[1], base, 1))};
const auto base_shift{OpShiftLeftLogical(U64, base_hi, ConstU32(32U))};
const auto base_addr{OpBitwiseOr(U64, base_lo, base_shift)};
const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
const auto result = EmitMemoryRead(U32[1], addr, [&]() {
if (dynamic) {
return u32_zero_value;
} else {
const auto& flatbuf_buffer{buffers[flatbuf_index]};
ASSERT(flatbuf_buffer.binding >= 0 &&
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
flatbuf_offset)};
return OpLoad(U32[1], ptr);
}
});
OpReturnValue(result);
OpFunctionEnd();
return func;
}
void EmitContext::DefineFunctions() {
if (info.uses_pack_10_11_11) {
f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
@ -1012,6 +1172,18 @@ void EmitContext::DefineFunctions() {
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
}
if (info.dma_types != IR::Type::Void) {
get_bda_pointer = DefineGetBdaPointer();
}
if (True(info.readconst_types & Info::ReadConstType::Immediate)) {
LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses immediate ReadConst", info.pgm_hash);
read_const = DefineReadConst(false);
}
if (True(info.readconst_types & Info::ReadConstType::Dynamic)) {
LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses dynamic ReadConst", info.pgm_hash);
read_const_dynamic = DefineReadConst(true);
}
}
} // namespace Shader::Backend::SPIRV

View file

@ -4,6 +4,7 @@
#pragma once
#include <array>
#include <unordered_map>
#include <sirit/sirit.h>
#include "shader_recompiler/backend/bindings.h"
@ -41,6 +42,17 @@ public:
Bindings& binding);
~EmitContext();
enum class PointerType : u32 {
U8,
U16,
F16,
U32,
F32,
U64,
F64,
NumAlias,
};
Id Def(const IR::Value& value);
void DefineBufferProperties();
@ -133,12 +145,72 @@ public:
return ConstantComposite(type, constituents);
}
inline Id AddLabel() {
last_label = Module::AddLabel();
return last_label;
}
inline Id AddLabel(Id label) {
last_label = Module::AddLabel(label);
return last_label;
}
PointerType PointerTypeFromType(Id type) {
if (type.value == U8.value)
return PointerType::U8;
if (type.value == U16.value)
return PointerType::U16;
if (type.value == F16[1].value)
return PointerType::F16;
if (type.value == U32[1].value)
return PointerType::U32;
if (type.value == F32[1].value)
return PointerType::F32;
if (type.value == U64.value)
return PointerType::U64;
if (type.value == F64[1].value)
return PointerType::F64;
UNREACHABLE_MSG("Unknown type for pointer");
}
Id EmitMemoryRead(Id type, Id address, auto&& fallback) {
const Id available_label = OpLabel();
const Id fallback_label = OpLabel();
const Id merge_label = OpLabel();
const Id addr = OpFunctionCall(U64, get_bda_pointer, address);
const Id is_available = OpINotEqual(U1[1], addr, u64_zero_value);
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
OpBranchConditional(is_available, available_label, fallback_label);
// Available
AddLabel(available_label);
const auto pointer_type = PointerTypeFromType(type);
const Id pointer_type_id = physical_pointer_types[pointer_type];
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
OpBranch(merge_label);
// Fallback
AddLabel(fallback_label);
const Id fallback_result = fallback();
OpBranch(merge_label);
// Merge
AddLabel(merge_label);
const Id final_result =
OpPhi(type, fallback_result, fallback_label, result, available_label);
return final_result;
}
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage;
LogicalStage l_stage{};
Id last_label{};
Id void_id{};
Id U8{};
Id S8{};
@ -161,9 +233,13 @@ public:
Id true_value{};
Id false_value{};
Id u8_one_value{};
Id u8_zero_value{};
Id u32_one_value{};
Id u32_zero_value{};
Id f32_zero_value{};
Id u64_one_value{};
Id u64_zero_value{};
Id shared_u8{};
Id shared_u16{};
@ -231,14 +307,6 @@ public:
bool is_storage = false;
};
enum class BufferAlias : u32 {
U8,
U16,
U32,
F32,
NumAlias,
};
struct BufferSpv {
Id id;
Id pointer_type;
@ -252,22 +320,40 @@ public:
Id size;
Id size_shorts;
Id size_dwords;
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
const BufferSpv& operator[](BufferAlias alias) const {
const BufferSpv& operator[](PointerType alias) const {
return aliases[u32(alias)];
}
BufferSpv& operator[](BufferAlias alias) {
BufferSpv& operator[](PointerType alias) {
return aliases[u32(alias)];
}
};
struct PhysicalPointerTypes {
std::array<Id, u32(PointerType::NumAlias)> types;
const Id& operator[](PointerType type) const {
return types[u32(type)];
}
Id& operator[](PointerType type) {
return types[u32(type)];
}
};
Bindings& binding;
boost::container::small_vector<Id, 16> buf_type_ids;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;
PhysicalPointerTypes physical_pointer_types;
std::unordered_map<u32, Id> first_to_last_label_map;
size_t flatbuf_index{};
size_t bda_pagetable_index{};
size_t fault_buffer_index{};
Id sampler_type{};
Id sampler_pointer_type{};
@ -292,6 +378,11 @@ public:
Id uf10_to_f32{};
Id f32_to_uf10{};
Id get_bda_pointer{};
Id read_const{};
Id read_const_dynamic{};
private:
void DefineArithmeticTypes();
void DefineInterfaces();
@ -312,6 +403,10 @@ private:
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
Id DefineGetBdaPointer();
Id DefineReadConst(bool dynamic);
Id GetBufferSize(u32 sharp_idx);
};

View file

@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const u32 dword_offset = [&] -> u32 {
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
return smrd.offset;
return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
return ir.Imm32(inst.src[1].code);
}
UNREACHABLE();
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, index));
}
}

View file

@ -41,7 +41,9 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class BufferType : u32 {
Guest,
ReadConstUbo,
Flatbuf,
BdaPagetable,
FaultBuffer,
GdsBuffer,
SharedMemory,
};
@ -215,11 +217,18 @@ struct Info {
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{};
bool has_readconst{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
enum class ReadConstType {
None = 0,
Immediate = 1 << 0,
Dynamic = 1 << 1,
};
ReadConstType readconst_types{};
IR::Type dma_types{IR::Type::Void};
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
user_data{params.user_data} {}
@ -277,6 +286,7 @@ struct Info {
sizeof(tess_constants));
}
};
DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
return inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);

View file

@ -0,0 +1,44 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "abstract_syntax_list.h"
namespace Shader::IR {
std::string DumpASLNode(const AbstractSyntaxNode& node,
const std::map<const Block*, size_t>& block_to_index,
const std::map<const Inst*, size_t>& inst_to_index) {
switch (node.type) {
case AbstractSyntaxNode::Type::Block:
return fmt::format("Block: ${}", block_to_index.at(node.data.block));
case AbstractSyntaxNode::Type::If:
return fmt::format("If: cond = %{}, body = ${}, merge = ${}",
inst_to_index.at(node.data.if_node.cond.Inst()),
block_to_index.at(node.data.if_node.body),
block_to_index.at(node.data.if_node.merge));
case AbstractSyntaxNode::Type::EndIf:
return fmt::format("EndIf: merge = ${}", block_to_index.at(node.data.end_if.merge));
case AbstractSyntaxNode::Type::Loop:
return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}",
block_to_index.at(node.data.loop.body),
block_to_index.at(node.data.loop.continue_block),
block_to_index.at(node.data.loop.merge));
case AbstractSyntaxNode::Type::Repeat:
return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}",
inst_to_index.at(node.data.repeat.cond.Inst()),
block_to_index.at(node.data.repeat.loop_header),
block_to_index.at(node.data.repeat.merge));
case AbstractSyntaxNode::Type::Break:
return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}",
inst_to_index.at(node.data.break_node.cond.Inst()),
block_to_index.at(node.data.break_node.merge),
block_to_index.at(node.data.break_node.skip));
case AbstractSyntaxNode::Type::Return:
return "Return";
case AbstractSyntaxNode::Type::Unreachable:
return "Unreachable";
};
UNREACHABLE();
}
} // namespace Shader::IR

View file

@ -3,6 +3,7 @@
#pragma once
#include <map>
#include <vector>
#include "shader_recompiler/ir/value.h"
@ -53,4 +54,8 @@ struct AbstractSyntaxNode {
};
using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
std::string DumpASLNode(const AbstractSyntaxNode& node,
const std::map<const Block*, size_t>& block_to_index,
const std::map<const Inst*, size_t>& inst_to_index);
} // namespace Shader::IR

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
#include "video_core/buffer_cache/buffer_cache.h"
namespace Shader::Optimization {
@ -79,14 +80,21 @@ void Visit(Info& info, const IR::Inst& inst) {
info.uses_lane_id = true;
break;
case IR::Opcode::ReadConst:
if (!info.has_readconst) {
if (info.readconst_types == Info::ReadConstType::None) {
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::ReadConstUbo,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
info.has_readconst = true;
}
if (inst.Flags<u32>() != 0) {
info.readconst_types |= Info::ReadConstType::Immediate;
} else {
info.readconst_types |= Info::ReadConstType::Dynamic;
}
info.dma_types |= IR::Type::U32;
break;
case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true;
@ -105,6 +113,21 @@ void CollectShaderInfoPass(IR::Program& program) {
Visit(program.info, inst);
}
}
if (program.info.dma_types != IR::Type::Void) {
program.info.buffers.push_back({
.used_types = IR::Type::U64,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::BDA_PAGETABLE_SIZE),
.buffer_type = BufferType::BdaPagetable,
.is_written = true,
});
program.info.buffers.push_back({
.used_types = IR::Type::U8,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE),
.buffer_type = BufferType::FaultBuffer,
.is_written = true,
});
}
}
} // namespace Shader::Optimization

View file

@ -6,13 +6,30 @@
#include <fmt/format.h>
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
std::string DumpProgram(const Program& program) {
void DumpProgram(const Program& program, const Info& info, const std::string& type) {
using namespace Common::FS;
if (!Config::dumpShaders()) {
return;
}
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto ir_filename =
fmt::format("{}_{:#018x}.{}irprogram.txt", info.stage, info.pgm_hash, type);
const auto ir_file = IOFile{dump_dir / ir_filename, FileAccessMode::Write, FileType::TextFile};
size_t index{0};
std::map<const IR::Inst*, size_t> inst_to_index;
std::map<const IR::Block*, size_t> block_to_index;
@ -21,11 +38,20 @@ std::string DumpProgram(const Program& program) {
block_to_index.emplace(block, index);
++index;
}
std::string ret;
for (const auto& block : program.blocks) {
ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
std::string s = IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
ir_file.WriteString(s);
}
const auto asl_filename = fmt::format("{}_{:#018x}.{}asl.txt", info.stage, info.pgm_hash, type);
const auto asl_file =
IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile};
for (const auto& node : program.syntax_list) {
std::string s = IR::DumpASLNode(node, block_to_index, inst_to_index) + '\n';
asl_file.WriteString(s);
}
return ret;
}
} // namespace Shader::IR

View file

@ -21,6 +21,6 @@ struct Program {
Info& info;
};
[[nodiscard]] std::string DumpProgram(const Program& program);
void DumpProgram(const Program& program, const Info& info, const std::string& type = "");
} // namespace Shader::IR

View file

@ -85,6 +85,8 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::CollectShaderInfoPass(program);
Shader::IR::DumpProgram(program, info);
return program;
}