mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-19 09:54:54 +00:00
video_core: Implement basic compute shaders and more instructions
This commit is contained in:
parent
10bceb1643
commit
58de7ff55a
58 changed files with 1234 additions and 293 deletions
|
@ -173,10 +173,10 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
spv::ExecutionModel execution_model{};
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
// const std::array<u32, 3> workgroup_size{program.workgroup_size};
|
||||
// execution_model = spv::ExecutionModel::GLCompute;
|
||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
// workgroup_size[1], workgroup_size[2]);
|
||||
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
|
||||
execution_model = spv::ExecutionModel::GLCompute;
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
workgroup_size[1], workgroup_size[2]);
|
||||
break;
|
||||
}
|
||||
case Stage::Vertex:
|
||||
|
@ -189,6 +189,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
} else {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
||||
// if (program.info.stores_frag_depth) {
|
||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
// }
|
||||
|
@ -249,7 +250,11 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
|
|||
}
|
||||
|
||||
Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
|
||||
throw NotImplementedException("Forward identity declaration");
|
||||
const Id id{ctx.Def(value)};
|
||||
if (!Sirit::ValidId(id)) {
|
||||
throw NotImplementedException("Forward identity declaration");
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
void EmitReference(EmitContext&) {}
|
||||
|
@ -258,23 +263,11 @@ void EmitPhiMove(EmitContext&) {
|
|||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetZeroFromOp(EmitContext&) {
|
||||
void EmitGetScc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetSignFromOp(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetCarryFromOp(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetOverflowFromOp(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVcc(EmitContext& ctx) {
|
||||
void EmitGetExec(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
|
@ -282,4 +275,24 @@ void EmitGetVcc(EmitContext& ctx) {
|
|||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVccLo(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetScc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetExec(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVcc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVccLo(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -29,8 +29,8 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitGetUserData(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
||||
return ctx.ConstU32(ctx.info.user_data[static_cast<size_t>(reg)]);
|
||||
}
|
||||
|
||||
void EmitGetScalarRegister(EmitContext&) {
|
||||
|
@ -62,10 +62,13 @@ Id EmitReadConst(EmitContext& ctx) {
|
|||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
const Id buffer = ctx.buffers[handle];
|
||||
const Id type = ctx.info.buffers[handle].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
|
||||
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
|
||||
return ctx.OpLoad(ctx.F32[1], ptr);
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
}
|
||||
|
||||
Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
|
||||
return EmitReadConstBuffer(ctx, handle, index);
|
||||
}
|
||||
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
|
@ -76,8 +79,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
// Attribute is disabled or varying component is not written
|
||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||
}
|
||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||
return ctx.OpLoad(param.component_type, pointer);
|
||||
if (param.num_components > 1) {
|
||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||
return ctx.OpLoad(param.component_type, pointer);
|
||||
} else {
|
||||
return ctx.OpLoad(param.component_type, param.id);
|
||||
}
|
||||
}
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
}
|
||||
|
@ -86,6 +93,11 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
switch (attr) {
|
||||
case IR::Attribute::VertexId:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
||||
case IR::Attribute::WorkgroupId:
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
||||
case IR::Attribute::LocalInvocationId:
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id),
|
||||
comp);
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
}
|
||||
|
@ -97,9 +109,22 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
|||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (info.index_enable && info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (info.index_enable) {
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, address)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferF32(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -110,18 +135,48 @@ Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
|
|||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const Id buffer = ctx.buffers[handle];
|
||||
const Id type = ctx.info.buffers[handle].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (info.index_enable && info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (info.index_enable) {
|
||||
boost::container::static_vector<Id, 4> ids;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
|
||||
ids.push_back(ctx.OpLoad(ctx.F32[1], ptr));
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.F32[4], ids);
|
||||
return ctx.OpCompositeConstruct(buffer.data_types->Get(4), ids);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (info.index_enable && info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (info.index_enable) {
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, address)};
|
||||
ctx.OpStore(ptr, value);
|
||||
return;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
|
|
@ -30,6 +30,10 @@ Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
|||
return ctx.OpFAdd(ctx.F64[1], a, b);
|
||||
}
|
||||
|
||||
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFSub(ctx.F32[1], a, b);
|
||||
}
|
||||
|
||||
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F16[1], a, b, c);
|
||||
}
|
||||
|
@ -196,6 +200,10 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpTrunc(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPFract(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFract(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
namespace Shader::IR {
|
||||
enum class Attribute : u64;
|
||||
enum class Patch : u64;
|
||||
enum class ScalarReg : u32;
|
||||
class Inst;
|
||||
class Value;
|
||||
} // namespace Shader::IR
|
||||
|
@ -30,11 +30,18 @@ void EmitJoin(EmitContext& ctx);
|
|||
void EmitBarrier(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
void EmitGetScc(EmitContext& ctx);
|
||||
void EmitGetExec(EmitContext& ctx);
|
||||
void EmitGetVcc(EmitContext& ctx);
|
||||
void EmitGetVccLo(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
void EmitSetExec(EmitContext& ctx);
|
||||
void EmitSetVcc(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitGetUserData(EmitContext& ctx);
|
||||
void EmitDiscard(EmitContext& ctx);
|
||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
|
||||
void EmitGetScalarRegister(EmitContext& ctx);
|
||||
void EmitSetScalarRegister(EmitContext& ctx);
|
||||
void EmitGetVectorRegister(EmitContext& ctx);
|
||||
|
@ -44,10 +51,17 @@ void EmitGetGotoVariable(EmitContext& ctx);
|
|||
void EmitSetScc(EmitContext& ctx);
|
||||
Id EmitReadConst(EmitContext& ctx);
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
|
||||
Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index);
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
|
@ -137,6 +151,7 @@ Id EmitFPAbs64(EmitContext& ctx, Id value);
|
|||
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
|
||||
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
|
||||
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
|
||||
|
@ -177,6 +192,7 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
|
|||
Id EmitFPTrunc16(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc32(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFract(EmitContext& ctx, Id value);
|
||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
|
|
|
@ -10,6 +10,10 @@ void EmitPrologue(EmitContext& ctx) {}
|
|||
|
||||
void EmitEpilogue(EmitContext& ctx) {}
|
||||
|
||||
void EmitDiscard(EmitContext& ctx) {
|
||||
ctx.OpDemoteToHelperInvocationEXT();
|
||||
}
|
||||
|
||||
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
|
||||
throw NotImplementedException("Geometry streams");
|
||||
}
|
||||
|
|
|
@ -194,6 +194,12 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -233,10 +239,11 @@ void EmitContext::DefineOutputs(const Info& info) {
|
|||
|
||||
void EmitContext::DefineBuffers(const Info& info) {
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
ASSERT(True(buffer.used_types & IR::Type::F32));
|
||||
ASSERT(buffer.stride % sizeof(float) == 0);
|
||||
const u32 num_elements = buffer.stride * buffer.num_records / sizeof(float);
|
||||
const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))};
|
||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
const u32 stride = buffer.stride == 0 ? 1 : buffer.stride;
|
||||
const u32 num_elements = stride * buffer.num_records;
|
||||
const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))};
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
|
||||
|
@ -249,18 +256,18 @@ void EmitContext::DefineBuffers(const Info& info) {
|
|||
const auto storage_class =
|
||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
if (buffer.is_storage) {
|
||||
storage_f32 = TypePointer(storage_class, F32[1]);
|
||||
} else {
|
||||
uniform_f32 = TypePointer(storage_class, F32[1]);
|
||||
}
|
||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("c{}", i));
|
||||
Name(id, fmt::format("{}{}", buffer.is_storage ? "ssbo" : "cbuf", i));
|
||||
|
||||
binding++;
|
||||
buffers.push_back(id);
|
||||
buffers.push_back({
|
||||
.id = id,
|
||||
.data_types = data_types,
|
||||
.pointer_type = pointer_type,
|
||||
});
|
||||
interfaces.push_back(id);
|
||||
i++;
|
||||
}
|
||||
|
|
|
@ -23,6 +23,14 @@ struct VectorIds {
|
|||
return ids[index - 1];
|
||||
}
|
||||
|
||||
[[nodiscard]] Id& Get(u32 index) {
|
||||
return ids[index - 1];
|
||||
}
|
||||
|
||||
[[nodiscard]] const Id& Get(u32 index) const {
|
||||
return ids[index - 1];
|
||||
}
|
||||
|
||||
std::array<Id, 4> ids;
|
||||
};
|
||||
|
||||
|
@ -141,9 +149,6 @@ public:
|
|||
Id output_u32{};
|
||||
Id output_f32{};
|
||||
|
||||
Id uniform_f32{};
|
||||
Id storage_f32{};
|
||||
|
||||
boost::container::small_vector<Id, 16> interfaces;
|
||||
|
||||
Id output_position{};
|
||||
|
@ -151,6 +156,9 @@ public:
|
|||
Id base_vertex{};
|
||||
std::array<Id, 8> frag_color{};
|
||||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
|
||||
struct TextureDefinition {
|
||||
Id id;
|
||||
Id sampled_type;
|
||||
|
@ -158,8 +166,14 @@ public:
|
|||
Id image_type;
|
||||
};
|
||||
|
||||
struct BufferDefinition {
|
||||
Id id;
|
||||
const VectorIds* data_types;
|
||||
Id pointer_type;
|
||||
};
|
||||
|
||||
u32& binding;
|
||||
boost::container::small_vector<Id, 4> buffers;
|
||||
boost::container::small_vector<BufferDefinition, 4> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 4> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
|||
|
||||
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
: block_pool{block_pool_}, inst_list{inst_list_} {
|
||||
index_to_pc.resize(inst_list.size());
|
||||
index_to_pc.resize(inst_list.size() + 1);
|
||||
EmitLabels();
|
||||
EmitBlocks();
|
||||
LinkBlocks();
|
||||
|
@ -78,6 +78,7 @@ void CFG::EmitLabels() {
|
|||
}
|
||||
pc += inst.length;
|
||||
}
|
||||
index_to_pc[inst_list.size()] = pc;
|
||||
|
||||
// Sort labels to make sure block insertion is correct.
|
||||
std::ranges::sort(labels);
|
||||
|
@ -90,7 +91,7 @@ void CFG::EmitBlocks() {
|
|||
}
|
||||
const auto it_index = std::ranges::lower_bound(index_to_pc, label);
|
||||
ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back());
|
||||
return std::distance(index_to_pc.begin(), std::prev(it_index));
|
||||
return std::distance(index_to_pc.begin(), it_index);
|
||||
};
|
||||
|
||||
for (auto it = labels.begin(); it != labels.end(); it++) {
|
||||
|
@ -102,7 +103,7 @@ void CFG::EmitBlocks() {
|
|||
return;
|
||||
}
|
||||
const Label end = *next_it;
|
||||
const size_t end_index = get_index(end);
|
||||
const size_t end_index = get_index(end) - 1;
|
||||
const auto& end_inst = inst_list[end_index];
|
||||
|
||||
// Insert block between the labels using the last instruction
|
||||
|
@ -146,9 +147,15 @@ void CFG::LinkBlocks() {
|
|||
block.branch_true = get_block(target_pc);
|
||||
block.branch_false = get_block(block.end);
|
||||
block.end_class = EndClass::Branch;
|
||||
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
||||
const auto& prev_inst = inst_list[block.end_index - 1];
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
} else {
|
||||
// Exit blocks don't link to anything.
|
||||
block.end_class = EndClass::Exit;
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -187,12 +194,12 @@ std::string CFG::Dot() const {
|
|||
fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
// case EndClass::Kill:
|
||||
// dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
||||
// dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
|
||||
// node_uid);
|
||||
// ++node_uid;
|
||||
// break;
|
||||
case EndClass::Kill:
|
||||
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
||||
dot +=
|
||||
fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dot += "\t\tlabel = \"main\";\n\t}\n";
|
||||
|
|
|
@ -21,6 +21,7 @@ using Hook =
|
|||
enum class EndClass {
|
||||
Branch, ///< Block ends with a (un)conditional branch.
|
||||
Exit, ///< Block ends with an exit instruction.
|
||||
Kill, ///< Block ends with a discard instruction.
|
||||
};
|
||||
|
||||
/// A block represents a linear range of instructions.
|
||||
|
|
|
@ -684,7 +684,7 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
|
|||
outputMod.clamp = static_cast<bool>(control.clmp);
|
||||
switch (control.omod) {
|
||||
case 0:
|
||||
outputMod.multiplier = std::numeric_limits<float>::quiet_NaN();
|
||||
outputMod.multiplier = 0.f;
|
||||
break;
|
||||
case 1:
|
||||
outputMod.multiplier = 2.0f;
|
||||
|
|
|
@ -33,7 +33,7 @@ struct InputModifiers {
|
|||
/// These are applied before storing an operand register.
|
||||
struct OutputModifiers {
|
||||
bool clamp = false;
|
||||
float multiplier = std::numeric_limits<float>::quiet_NaN();
|
||||
float multiplier = 0.f;
|
||||
};
|
||||
|
||||
struct InstOperand {
|
||||
|
|
|
@ -409,9 +409,9 @@ private:
|
|||
case EndClass::Exit:
|
||||
root.insert(ip, *pool.Create(Return{}, &root_stmt));
|
||||
break;
|
||||
// case EndClass::Kill:
|
||||
// root.insert(ip, *pool.Create(Kill{}, &root_stmt));
|
||||
// break;
|
||||
case EndClass::Kill:
|
||||
root.insert(ip, *pool.Create(Kill{}, &root_stmt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -606,8 +606,7 @@ public:
|
|||
Visit(root_stmt, nullptr, nullptr);
|
||||
|
||||
IR::Block& first_block{*syntax_list.front().data.block};
|
||||
IR::IREmitter ir(first_block, first_block.begin());
|
||||
ir.Prologue();
|
||||
Translator{&first_block, info}.EmitPrologue();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -767,7 +766,7 @@ private:
|
|||
case StatementType::Kill: {
|
||||
ensure_block();
|
||||
IR::Block* demote_block{MergeBlock(parent, stmt)};
|
||||
// IR::IREmitter{*current_block}.DemoteToHelperInvocation();
|
||||
IR::IREmitter{*current_block}.Discard();
|
||||
current_block->AddBranch(demote_block);
|
||||
current_block = demote_block;
|
||||
|
||||
|
|
|
@ -30,9 +30,16 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
|||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
// ir.SetScc(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_ANDN2_B64(const GcnInst& inst) {
|
||||
// TODO: Actually implement this.
|
||||
ir.SetScc(ir.GetVcc());
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -34,13 +34,11 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||
ir.Imm32(2))};
|
||||
const IR::U32 dword_offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
Load(ir, num_dwords, vsharp, dst_reg, offset);
|
||||
Load(ir, num_dwords, vsharp, dst_reg, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -9,7 +9,18 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} {
|
||||
Translator::Translator(IR::Block* block_, Info& info_)
|
||||
: ir{*block_, block_->begin()}, info{info_} {}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
ir.Prologue();
|
||||
|
||||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
for (u32 i = 0; i < info.num_user_data; i++) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg));
|
||||
}
|
||||
|
||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
|
@ -29,69 +40,108 @@ Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*bloc
|
|||
}
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace));
|
||||
break;
|
||||
case Stage::Compute:
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
}
|
||||
|
||||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg));
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
IR::U32F32 value{};
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
return ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
} else {
|
||||
return ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::VectorGPR:
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
return ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
} else {
|
||||
return ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstZero:
|
||||
if (force_flt) {
|
||||
return ir.Imm32(0.f);
|
||||
value = ir.Imm32(0.f);
|
||||
} else {
|
||||
return ir.Imm32(0U);
|
||||
value = ir.Imm32(0U);
|
||||
}
|
||||
break;
|
||||
case OperandField::SignedConstIntPos:
|
||||
ASSERT(!force_flt);
|
||||
return ir.Imm32(operand.code - SignedConstIntPosMin + 1);
|
||||
value = ir.Imm32(operand.code - SignedConstIntPosMin + 1);
|
||||
break;
|
||||
case OperandField::SignedConstIntNeg:
|
||||
ASSERT(!force_flt);
|
||||
return ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
ASSERT(!force_flt);
|
||||
return ir.Imm32(operand.code);
|
||||
if (force_flt) {
|
||||
value = ir.Imm32(std::bit_cast<float>(operand.code));
|
||||
} else {
|
||||
value = ir.Imm32(operand.code);
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
return ir.Imm32(1.f);
|
||||
value = ir.Imm32(1.f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
return ir.Imm32(0.5f);
|
||||
value = ir.Imm32(0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
return ir.Imm32(2.0f);
|
||||
value = ir.Imm32(2.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
return ir.Imm32(4.0f);
|
||||
value = ir.Imm32(4.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
return ir.Imm32(-0.5f);
|
||||
value = ir.Imm32(-0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
return ir.Imm32(-1.0f);
|
||||
value = ir.Imm32(-1.0f);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
value = ir.GetVccLo();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(value);
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||
IR::U32F32 result = value;
|
||||
if (operand.output_modifier.multiplier != 0.f) {
|
||||
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
||||
}
|
||||
if (operand.output_modifier.clamp) {
|
||||
result = ir.FPSaturate(value);
|
||||
}
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), value);
|
||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
||||
case OperandField::VectorGPR:
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), value);
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVccLo(result);
|
||||
case OperandField::VccHi:
|
||||
case OperandField::M0:
|
||||
break; // Ignore for now
|
||||
|
@ -168,6 +218,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_F32_U32:
|
||||
translator.V_CVT_F32_U32(inst);
|
||||
break;
|
||||
case Opcode::V_RCP_F32:
|
||||
translator.V_RCP_F32(inst);
|
||||
break;
|
||||
case Opcode::S_SWAPPC_B64:
|
||||
ASSERT(info.stage == Stage::Vertex);
|
||||
translator.EmitFetch(inst);
|
||||
|
@ -198,18 +251,81 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||
translator.V_CVT_PKRTZ_F16_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FRACT_F32:
|
||||
translator.V_FRACT_F32(inst);
|
||||
break;
|
||||
case Opcode::V_ADD_F32:
|
||||
translator.V_ADD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_OFF_F32_I4:
|
||||
translator.V_CVT_OFF_F32_I4(inst);
|
||||
break;
|
||||
case Opcode::V_MED3_F32:
|
||||
translator.V_MED3_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FLOOR_F32:
|
||||
translator.V_FLOOR_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_F32:
|
||||
translator.V_SUB_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FMA_F32:
|
||||
case Opcode::V_MADAK_F32: // Yes these can share the opcode
|
||||
translator.V_FMA_F32(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
translator.V_CMP_EQ_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
translator.V_CMPX_GT_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.TBUFFER_LOAD_FORMAT_XYZW(inst);
|
||||
translator.BUFFER_LOAD_FORMAT(4, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_F32:
|
||||
translator.V_MAX_F32(inst);
|
||||
break;
|
||||
case Opcode::S_ANDN2_B64:
|
||||
translator.S_ANDN2_B64(inst);
|
||||
break;
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_MOV_B64:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
|
|
|
@ -16,6 +16,7 @@ struct Info;
|
|||
namespace Shader::Gcn {
|
||||
|
||||
enum class ConditionOp : u32 {
|
||||
F,
|
||||
EQ,
|
||||
LG,
|
||||
GT,
|
||||
|
@ -28,12 +29,14 @@ class Translator {
|
|||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info);
|
||||
|
||||
void EmitPrologue();
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
|
||||
// Scalar ALU
|
||||
void S_MOV(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_ANDN2_B64(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -53,9 +56,21 @@ public:
|
|||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
void V_CVT_F32_U32(const GcnInst& inst);
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_FRACT_F32(const GcnInst& inst);
|
||||
void V_ADD_F32(const GcnInst& inst);
|
||||
void V_CVT_OFF_F32_I4(const GcnInst& inst);
|
||||
void V_MED3_F32(const GcnInst& inst);
|
||||
void V_FLOOR_F32(const GcnInst& inst);
|
||||
void V_SUB_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_CMPX_GT_U32(const GcnInst& inst);
|
||||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
@ -76,7 +91,6 @@ private:
|
|||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
|
||||
private:
|
||||
IR::Block* block;
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
};
|
||||
|
|
|
@ -102,4 +102,95 @@ void Translator::V_MAD_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(
|
||||
dst_reg,
|
||||
ir.FPMul(ir.ConvertUToF(32, 32, ir.ISub(ir.BitwiseAnd(src0, ir.Imm32(0xF)), ir.Imm32(8))),
|
||||
ir.Imm32(1.f / 16.f)));
|
||||
}
|
||||
|
||||
void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
||||
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||
}
|
||||
|
||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SUB_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPSub(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_RCP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||
}
|
||||
|
||||
void Translator::V_CMPX_GT_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U1 result = ir.IGreaterThan(src0, src1, false);
|
||||
ir.SetVcc(result);
|
||||
ir.SetExec(result);
|
||||
}
|
||||
|
||||
void Translator::V_FMA_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::U1 result = [&] {
|
||||
switch (op) {
|
||||
case ConditionOp::F:
|
||||
return ir.Imm1(false);
|
||||
case ConditionOp::EQ:
|
||||
return ir.FPEqual(src0, src1);
|
||||
case ConditionOp::LG:
|
||||
return ir.FPNotEqual(src0, src1);
|
||||
case ConditionOp::GT:
|
||||
return ir.FPGreaterThan(src0, src1);
|
||||
case ConditionOp::LT:
|
||||
return ir.FPLessThan(src0, src1);
|
||||
case ConditionOp::LE:
|
||||
return ir.FPLessThanEqual(src0, src1);
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
}
|
||||
}();
|
||||
ir.SetVcc(result);
|
||||
}
|
||||
|
||||
void Translator::V_MAX_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
SetDst(inst.dst[0], ir.FPMax(src0, src1));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -107,7 +107,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) {
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
|
@ -127,15 +127,68 @@ void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) {
|
|||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
info.is_typed.Assign(1);
|
||||
info.is_typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
const IR::Value value = ir.LoadBuffer(4, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
return;
|
||||
}
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.is_typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
IR::Value value{};
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -10,6 +10,10 @@ bool IsParam(Attribute attribute) noexcept {
|
|||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
||||
bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
std::string NameOf(Attribute attribute) {
|
||||
switch (attribute) {
|
||||
case Attribute::RenderTarget0:
|
||||
|
@ -112,6 +116,12 @@ std::string NameOf(Attribute attribute) {
|
|||
return "FragCoord";
|
||||
case Attribute::IsFrontFace:
|
||||
return "IsFrontFace";
|
||||
case Attribute::WorkgroupId:
|
||||
return "WorkgroupId";
|
||||
case Attribute::LocalInvocationId:
|
||||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -81,6 +81,8 @@ constexpr size_t NumParams = 32;
|
|||
|
||||
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
|
||||
|
||||
[[nodiscard]] bool IsMrt(Attribute attribute) noexcept;
|
||||
|
||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||
|
||||
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
||||
|
|
|
@ -111,6 +111,10 @@ void IREmitter::Epilogue() {
|
|||
Inst(Opcode::Epilogue);
|
||||
}
|
||||
|
||||
void IREmitter::Discard() {
|
||||
Inst(Opcode::Discard);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
||||
return Inst<U32>(Opcode::GetUserData, reg);
|
||||
}
|
||||
|
@ -156,11 +160,17 @@ U1 IREmitter::Condition(IR::Condition cond) {
|
|||
case IR::Condition::True:
|
||||
return Imm1(true);
|
||||
case IR::Condition::Scc0:
|
||||
return LogicalNot(GetScc());
|
||||
case IR::Condition::Scc1:
|
||||
return GetScc();
|
||||
case IR::Condition::Vccz:
|
||||
return LogicalNot(GetVcc());
|
||||
case IR::Condition::Vccnz:
|
||||
return GetVcc();
|
||||
case IR::Condition::Execz:
|
||||
return LogicalNot(GetExec());
|
||||
case IR::Condition::Execnz:
|
||||
return GetExec();
|
||||
default:
|
||||
throw NotImplementedException("");
|
||||
}
|
||||
|
@ -170,14 +180,38 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) {
|
|||
Inst(Opcode::SetGotoVariable, id, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetScc() {
|
||||
return Inst<U1>(Opcode::GetScc);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetExec() {
|
||||
return Inst<U1>(Opcode::GetExec);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetVcc() {
|
||||
return Inst<U1>(Opcode::GetVcc);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetVccLo() {
|
||||
return Inst<U32>(Opcode::GetVccLo);
|
||||
}
|
||||
|
||||
void IREmitter::SetScc(const U1& value) {
|
||||
Inst(Opcode::SetScc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetExec(const U1& value) {
|
||||
Inst(Opcode::SetExec, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVcc(const U1& value) {
|
||||
Inst(Opcode::SetVcc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVccLo(const U32& value) {
|
||||
Inst(Opcode::SetVccLo, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
|
||||
}
|
||||
|
@ -247,6 +281,27 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
}
|
||||
}
|
||||
|
||||
void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32,
|
||||
Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 2:
|
||||
Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 3:
|
||||
Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 4:
|
||||
Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data);
|
||||
break;
|
||||
default:
|
||||
throw InvalidArgument("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
@ -261,6 +316,18 @@ F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPSub(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<F32>(Opcode::FPSub32, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
|
||||
if (e1.Type() != e2.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
|
||||
|
@ -612,6 +679,10 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) {
|
|||
}
|
||||
}
|
||||
|
||||
F32 IREmitter::Fract(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPFract, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
|
||||
void Prologue();
|
||||
void Epilogue();
|
||||
void Discard();
|
||||
|
||||
U32 GetUserData(IR::ScalarReg reg);
|
||||
|
||||
|
@ -54,9 +55,14 @@ public:
|
|||
[[nodiscard]] U1 GetGotoVariable(u32 id);
|
||||
void SetGotoVariable(u32 id, const U1& value);
|
||||
|
||||
[[nodiscard]] U1 GetScc();
|
||||
[[nodiscard]] U1 GetExec();
|
||||
[[nodiscard]] U1 GetVcc();
|
||||
|
||||
[[nodiscard]] U32 GetVccLo();
|
||||
void SetScc(const U1& value);
|
||||
void SetExec(const U1& value);
|
||||
void SetVcc(const U1& value);
|
||||
void SetVccLo(const U32& value);
|
||||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
|
@ -72,6 +78,8 @@ public:
|
|||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetSignFromOp(const Value& op);
|
||||
|
@ -100,6 +108,7 @@ public:
|
|||
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
|
||||
|
||||
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
|
||||
|
||||
|
@ -121,6 +130,7 @@ public:
|
|||
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
|
||||
[[nodiscard]] F32 Fract(const F32& value);
|
||||
|
||||
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
|
|
|
@ -45,15 +45,13 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::PhiMove:
|
||||
case Opcode::Prologue:
|
||||
case Opcode::Epilogue:
|
||||
// case Opcode::Join:
|
||||
// case Opcode::Barrier:
|
||||
// case Opcode::WorkgroupMemoryBarrier:
|
||||
// case Opcode::DeviceMemoryBarrier:
|
||||
// case Opcode::EmitVertex:
|
||||
// case Opcode::EndPrimitive:
|
||||
case Opcode::Discard:
|
||||
case Opcode::SetAttribute:
|
||||
// case Opcode::SetFragColor:
|
||||
// case Opcode::SetFragDepth:
|
||||
case Opcode::StoreBufferF32:
|
||||
case Opcode::StoreBufferF32x2:
|
||||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -12,10 +12,12 @@ OPCODE(PhiMove, Void, Opaq
|
|||
// Special operations
|
||||
OPCODE(Prologue, Void, )
|
||||
OPCODE(Epilogue, Void, )
|
||||
OPCODE(Discard, Void, )
|
||||
|
||||
// Constant memory operations
|
||||
OPCODE(ReadConst, U32, U64, U32, )
|
||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
|
@ -30,10 +32,14 @@ OPCODE(GetAttributeU32, U32, Attr
|
|||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
|
||||
// Flags
|
||||
//OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
//OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetExec, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
OPCODE(GetVccLo, U32, Void, )
|
||||
OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetExec, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(SetVccLo, Void, U32, )
|
||||
|
||||
// Undefined
|
||||
OPCODE(UndefU1, U1, )
|
||||
|
@ -47,6 +53,12 @@ OPCODE(LoadBufferF32, F32, Opaq
|
|||
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
|
||||
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
|
||||
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
|
||||
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
|
||||
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
|
@ -114,6 +126,7 @@ OPCODE(FPAbs32, F32, F32,
|
|||
OPCODE(FPAbs64, F64, F64, )
|
||||
OPCODE(FPAdd32, F32, F32, F32, )
|
||||
OPCODE(FPAdd64, F64, F64, F64, )
|
||||
OPCODE(FPSub32, F32, F32, F32, )
|
||||
OPCODE(FPFma32, F32, F32, F32, F32, )
|
||||
OPCODE(FPFma64, F64, F64, F64, F64, )
|
||||
OPCODE(FPMax32, F32, F32, F32, )
|
||||
|
@ -145,6 +158,7 @@ OPCODE(FPCeil32, F32, F32,
|
|||
OPCODE(FPCeil64, F64, F64, )
|
||||
OPCODE(FPTrunc32, F32, F32, )
|
||||
OPCODE(FPTrunc64, F64, F64, )
|
||||
OPCODE(FPFract, F32, F32, )
|
||||
|
||||
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
||||
|
|
|
@ -88,15 +88,6 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||
return;
|
||||
}
|
||||
// if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||
// if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
// // Replace the bitcast with a typed constant buffer read
|
||||
// inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
||||
// inst.SetArg(0, arg_inst->Arg(0));
|
||||
// inst.SetArg(1, arg_inst->Arg(1));
|
||||
// return;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
||||
|
@ -249,6 +240,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
return FoldAdd<u32>(block, inst);
|
||||
case IR::Opcode::ISub32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; });
|
||||
return;
|
||||
case IR::Opcode::ConvertF32U32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
||||
return;
|
||||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
34
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
Normal file
34
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <vector>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void IdentityRemovalPass(IR::BlockList& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -2,8 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <optional>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
|
@ -27,26 +25,54 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type BufferLoadType(const IR::Inst& inst) {
|
||||
IR::Type BufferDataType(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
return IR::Type::F32;
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return IR::Type::U32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
bool IsBufferStore(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsImageInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
|
@ -157,10 +183,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = u32(buffer.stride),
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferLoadType(inst),
|
||||
.is_storage = /*buffer.base_address % 64 != 0*/ true,
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = true || IsBufferStore(inst),
|
||||
});
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
@ -171,17 +197,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
return;
|
||||
}
|
||||
// Calculate buffer address.
|
||||
const u32 dword_stride = buffer.stride / sizeof(u32);
|
||||
const u32 dword_stride = buffer.GetStrideElements(sizeof(u32));
|
||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||
IR::U32 address = ir.Imm32(dword_offset);
|
||||
if (inst_info.index_enable && inst_info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (inst_info.index_enable) {
|
||||
const IR::U32 index{inst.Arg(1)};
|
||||
IR::U32 index{inst.Arg(1)};
|
||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||
} else if (inst_info.offset_enable) {
|
||||
const IR::U32 offset{inst.Arg(1)};
|
||||
|
@ -245,6 +272,36 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// When loading data from untyped buffer we don't have if it is float or integer.
|
||||
// Most of the time it is float so that is the default. This pass detects float buffer loads
|
||||
// combined with bitcasts and patches them to be integer loads.
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
continue;
|
||||
}
|
||||
// Replace the bitcast with a typed buffer read
|
||||
IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()};
|
||||
if (!arg_inst) {
|
||||
continue;
|
||||
}
|
||||
const auto replace{[&](IR::Opcode new_opcode) {
|
||||
inst.ReplaceOpcode(new_opcode);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
inst.SetFlags(arg_inst->Flags<u32>());
|
||||
arg_inst->Invalidate();
|
||||
}};
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
replace(IR::Opcode::ReadConstBufferU32);
|
||||
}
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) {
|
||||
replace(IR::Opcode::LoadBufferU32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
|
|
|
@ -17,10 +17,8 @@
|
|||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
@ -30,11 +28,10 @@ namespace {
|
|||
struct FlagTag {
|
||||
auto operator<=>(const FlagTag&) const noexcept = default;
|
||||
};
|
||||
struct ZeroFlagTag : FlagTag {};
|
||||
struct SignFlagTag : FlagTag {};
|
||||
struct CarryFlagTag : FlagTag {};
|
||||
struct OverflowFlagTag : FlagTag {};
|
||||
struct SccFlagTag : FlagTag {};
|
||||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
GotoVariable() = default;
|
||||
|
@ -45,8 +42,8 @@ struct GotoVariable : FlagTag {
|
|||
u32 index;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, ZeroFlagTag, SignFlagTag, CarryFlagTag,
|
||||
OverflowFlagTag, GotoVariable, VccFlagTag>;
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -71,32 +68,25 @@ struct DefTable {
|
|||
goto_vars[variable.index].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
|
||||
return zero_flag[block];
|
||||
const IR::Value& Def(IR::Block* block, SccFlagTag) {
|
||||
return scc_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
|
||||
zero_flag.insert_or_assign(block, value);
|
||||
void SetDef(IR::Block* block, SccFlagTag, const IR::Value& value) {
|
||||
scc_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SignFlagTag) {
|
||||
return sign_flag[block];
|
||||
const IR::Value& Def(IR::Block* block, ExecFlagTag) {
|
||||
return exec_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
|
||||
sign_flag.insert_or_assign(block, value);
|
||||
void SetDef(IR::Block* block, ExecFlagTag, const IR::Value& value) {
|
||||
exec_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
|
||||
return carry_flag[block];
|
||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||
return vcc_lo_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
|
||||
carry_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
|
||||
return overflow_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
|
||||
overflow_flag.insert_or_assign(block, value);
|
||||
void SetDef(IR::Block* block, VccLoTag, const IR::Value& value) {
|
||||
vcc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccFlagTag) {
|
||||
|
@ -107,12 +97,10 @@ struct DefTable {
|
|||
}
|
||||
|
||||
std::unordered_map<u32, ValueMap> goto_vars;
|
||||
ValueMap indirect_branch_var;
|
||||
ValueMap zero_flag;
|
||||
ValueMap sign_flag;
|
||||
ValueMap carry_flag;
|
||||
ValueMap overflow_flag;
|
||||
ValueMap scc_flag;
|
||||
ValueMap exec_flag;
|
||||
ValueMap vcc_flag;
|
||||
ValueMap vcc_lo_flag;
|
||||
};
|
||||
|
||||
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
|
||||
|
@ -306,18 +294,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetGotoVariable:
|
||||
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
|
||||
break;
|
||||
case IR::Opcode::SetExec:
|
||||
pass.WriteVariable(ExecFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetScc:
|
||||
pass.WriteVariable(SccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
// case IR::Opcode::SetSFlag:
|
||||
// pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
// case IR::Opcode::SetCFlag:
|
||||
// pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
// case IR::Opcode::SetOFlag:
|
||||
// pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::GetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
|
@ -331,18 +319,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetGotoVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
|
||||
break;
|
||||
case IR::Opcode::GetExec:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(ExecFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetScc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SccFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
// case IR::Opcode::GetSFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
|
||||
// break;
|
||||
// case IR::Opcode::GetCFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
|
||||
// break;
|
||||
// case IR::Opcode::GetOFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
|
||||
// break;
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -365,44 +353,4 @@ void SsaRewritePass(IR::BlockList& program) {
|
|||
}
|
||||
}
|
||||
|
||||
void IdentityRemovalPass(IR::BlockList& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
|
@ -4,9 +4,8 @@
|
|||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/structured_control_flow.h"
|
||||
#include "shader_recompiler/ir/passes/passes.h"
|
||||
#include "shader_recompiler/ir/passes/ir_passes.h"
|
||||
#include "shader_recompiler/ir/post_order.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -62,9 +61,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
|
||||
for (const auto& block : program.blocks) {
|
||||
fmt::print("{}\n", IR::DumpBlock(*block));
|
||||
}
|
||||
fmt::print("{}\n", Shader::IR::DumpProgram(program));
|
||||
std::fflush(stdout);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -120,6 +120,9 @@ struct Info {
|
|||
ImageResourceList images;
|
||||
SamplerResourceList samplers;
|
||||
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
|
||||
u32 num_user_data;
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue