shader_recompiler: Small instruction parsing refactor/bugfixes (#340)

* translator: Implemtn f32 to f16 convert

* shader_recompiler: Add bit instructions

* shader_recompiler: More data share instructions

* shader_recompiler: Remove exec contexts, fix S_MOV_B64

* shader_recompiler: Split instruction parsing into categories

* shader_recompiler: Better BFS search

* shader_recompiler: Constant propagation pass for cmp_class_f32

* shader_recompiler: Partial readfirstlane implementation

* shader_recompiler: Stub readlane/writelane only for non-compute

* hack: Fix swizzle on RDR

* Will properly fix this when merging this

* clang format

* address_space: Bump user area size to full

* shader_recompiler: V_INTERP_MOV_F32

* Should work the same as spirv will emit flat decoration on demand

* kernel: Add MAP_OP_MAP_FLEXIBLE

* image_view: Attempt to apply storage swizzle on format

* vk_scheduler: Barrier attachments on renderpass end

* clang format

* liverpool: cs state backup

* shader_recompiler: More instructions and formats

* vector_alu: Proper V_MBCNT_U32_B32

* shader_recompiler: Port some dark souls things

* file_system: Implement sceKernelRename

* more formats

* clang format

* resource_tracking_pass: Back to assert

* translate: Tracedata

* kernel: Remove tracy lock

* Solves random crashes in Dark Souls

* code: Review comments
This commit is contained in:
TheTurtle 2024-07-31 00:32:40 +03:00 committed by GitHub
parent ac6dc20c3b
commit a7c9bfa5c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 1349 additions and 904 deletions

View file

@ -183,6 +183,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);
}
ctx.AddCapability(spv::Capability::Int64);
if (info.has_storage_images) {
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
}
@ -204,8 +205,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
} else {
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
}
ctx.AddCapability(spv::Capability::GroupNonUniform);
if (info.uses_group_quad) {
ctx.AddCapability(spv::Capability::GroupNonUniform);
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
}
if (info.has_discard) {
@ -217,9 +218,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
if (info.has_image_query) {
ctx.AddCapability(spv::Capability::ImageQuery);
}
// if (program.info.stores_frag_depth) {
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
// }
if (info.stores.Get(IR::Attribute::Depth)) {
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
break;
default:
throw NotImplementedException("Stage {}", u32(program.info.stage));

View file

@ -6,8 +6,8 @@
namespace Shader::Backend::SPIRV {
void EmitBitCastU16F16(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
Id EmitBitCastU16F16(EmitContext& ctx, Id value) {
return ctx.OpBitcast(ctx.U16, value);
}
Id EmitBitCastU32F32(EmitContext& ctx, Id value) {

View file

@ -120,6 +120,7 @@ void EmitGetGotoVariable(EmitContext&) {
}
Id EmitReadConst(EmitContext& ctx) {
return ctx.u32_zero_value;
UNREACHABLE_MSG("Unreachable instruction");
}
@ -149,6 +150,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.is_default) {
return ctx.OpCompositeExtract(param.component_type, param.id, comp);
}
if (param.num_components > 1) {
const Id pointer{
@ -208,7 +212,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, value);
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {

View file

@ -259,4 +259,8 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value) {
return ctx.OpUConvert(ctx.U16, value);
}
Id EmitConvertU32U16(EmitContext& ctx, Id value) {
return ctx.OpUConvert(ctx.U32[1], value);
}
} // namespace Shader::Backend::SPIRV

View file

@ -385,4 +385,8 @@ Id EmitFPIsInf64(EmitContext& ctx, Id value) {
return ctx.OpIsInf(ctx.U1[1], value);
}
void EmitFPCmpClass32(EmitContext&) {
UNREACHABLE();
}
} // namespace Shader::Backend::SPIRV

View file

@ -70,7 +70,6 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id o
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.Add(spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
operands.operands);
}
@ -106,8 +105,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
const auto type = ctx.info.images[handle & 0xFFFF].type;
const Id zero = ctx.u32_zero_value;
const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa &&
type != AmdGpu::ImageType::Buffer};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa};
const auto query{[&](Id type) {
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
: ctx.OpImageQuerySize(type, image);

View file

@ -42,6 +42,7 @@ void EmitSetVcc(EmitContext& ctx);
void EmitSetSccLo(EmitContext& ctx);
void EmitSetVccLo(EmitContext& ctx);
void EmitSetVccHi(EmitContext& ctx);
void EmitFPCmpClass32(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx);
void EmitDiscard(EmitContext& ctx);
@ -148,7 +149,7 @@ Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
void EmitBitCastU16F16(EmitContext& ctx);
Id EmitBitCastU16F16(EmitContext& ctx, Id value);
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
void EmitBitCastU64F64(EmitContext& ctx);
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
@ -282,6 +283,7 @@ Id EmitBitCount32(EmitContext& ctx, Id value);
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
Id EmitFindSMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb32(EmitContext& ctx, Id value);
Id EmitFindILsb32(EmitContext& ctx, Id value);
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
Id EmitUMin32(EmitContext& ctx, Id a, Id b);
Id EmitSMax32(EmitContext& ctx, Id a, Id b);
@ -353,6 +355,7 @@ Id EmitConvertF64U16(EmitContext& ctx, Id value);
Id EmitConvertF64U32(EmitContext& ctx, Id value);
Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitConvertU32U16(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);
@ -387,6 +390,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
} // namespace Shader::Backend::SPIRV

View file

@ -198,6 +198,10 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
return ctx.OpFindUMsb(ctx.U32[1], value);
}
Id EmitFindILsb32(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U32[1], value);
}
Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
return ctx.OpSMin(ctx.U32[1], a, b);
}

View file

@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) {
return ctx.ConstU32(static_cast<u32>(spv::Scope::Subgroup));
}
Id EmitWarpId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id);
}
Id EmitLaneId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
}

View file

@ -49,7 +49,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
DefineInterfaces(program);
DefineBuffers(info);
DefineImagesAndSamplers(info);
DefineSharedMemory(info);
DefineSharedMemory();
}
EmitContext::~EmitContext() = default;
@ -86,6 +86,7 @@ void EmitContext::DefineArithmeticTypes() {
F32[1] = Name(TypeFloat(32), "f32_id");
S32[1] = Name(TypeSInt(32), "i32_id");
U32[1] = Name(TypeUInt(32), "u32_id");
U64 = Name(TypeUInt(64), "u64_id");
for (u32 i = 2; i <= 4; i++) {
if (info.uses_fp16) {
@ -126,6 +127,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
@ -146,6 +148,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
@ -204,7 +207,9 @@ void EmitContext::DefineInputs(const Info& info) {
: 1;
// Note that we pass index rather than Id
input_params[input.binding] = {
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
rate_idx, input_u32,
U32[1], input.num_components,
false, input.instance_data_buf,
};
} else {
Id id{DefineInput(type, input.binding)};
@ -220,19 +225,18 @@ void EmitContext::DefineInputs(const Info& info) {
break;
}
case Stage::Fragment:
if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input);
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
for (const auto& input : info.ps_inputs) {
const u32 semantic = input.param_index;
if (input.is_default) {
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), input_f32,
F32[1]};
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
F32[1], 4, true};
continue;
}
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
@ -392,7 +396,16 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rgba8ui;
}
UNREACHABLE();
if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R11fG11fB10f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rgba32f;
}
UNREACHABLE_MSG("Unknown storage format data_format={}, num_format={}", image.GetDataFmt(),
image.GetNumberFmt());
}
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
@ -412,8 +425,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
case AmdGpu::ImageType::Buffer:
throw NotImplementedException("Image buffer");
default:
break;
}
@ -471,10 +482,14 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
}
}
void EmitContext::DefineSharedMemory(const Info& info) {
if (info.shared_memory_size == 0) {
void EmitContext::DefineSharedMemory() {
static constexpr size_t DefaultSharedMemSize = 16_KB;
if (!info.uses_shared) {
return;
}
if (info.shared_memory_size == 0) {
info.shared_memory_size = DefaultSharedMemSize;
}
const auto make{[&](Id element_type, u32 element_size) {
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};

View file

@ -180,6 +180,7 @@ public:
Id workgroup_id{};
Id local_invocation_id{};
Id subgroup_id{};
Id subgroup_local_invocation_id{};
Id image_u32{};
@ -219,6 +220,7 @@ public:
Id pointer_type;
Id component_type;
u32 num_components;
bool is_default{};
s32 buffer_handle{-1};
};
std::array<SpirvAttribute, 32> input_params{};
@ -231,7 +233,7 @@ private:
void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info);
void DefineImagesAndSamplers(const Info& info);
void DefineSharedMemory(const Info& info);
void DefineSharedMemory();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
};