mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-19 09:54:54 +00:00
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert * shader_recompiler: Add bit instructions * shader_recompiler: More data share instructions * shader_recompiler: Remove exec contexts, fix S_MOV_B64 * shader_recompiler: Split instruction parsing into categories * shader_recompiler: Better BFS search * shader_recompiler: Constant propagation pass for cmp_class_f32 * shader_recompiler: Partial readfirstlane implementation * shader_recompiler: Stub readlane/writelane only for non-compute * hack: Fix swizzle on RDR * Will properly fix this when merging this * clang format * address_space: Bump user area size to full * shader_recompiler: V_INTERP_MOV_F32 * Should work the same as spirv will emit flat decoration on demand * kernel: Add MAP_OP_MAP_FLEXIBLE * image_view: Attempt to apply storage swizzle on format * vk_scheduler: Barrier attachments on renderpass end * clang format * liverpool: cs state backup * shader_recompiler: More instructions and formats * vector_alu: Proper V_MBCNT_U32_B32 * shader_recompiler: Port some dark souls things * file_system: Implement sceKernelRename * more formats * clang format * resource_tracking_pass: Back to assert * translate: Tracedata * kernel: Remove tracy lock * Solves random crashes in Dark Souls * code: Review comments
This commit is contained in:
parent
ac6dc20c3b
commit
a7c9bfa5c5
66 changed files with 1349 additions and 904 deletions
|
@ -183,6 +183,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
if (info.has_storage_images) {
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
}
|
||||
|
@ -204,8 +205,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
} else {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
if (info.has_discard) {
|
||||
|
@ -217,9 +218,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
// if (program.info.stores_frag_depth) {
|
||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
// }
|
||||
if (info.stores.Get(IR::Attribute::Depth)) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
void EmitBitCastU16F16(EmitContext&) {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
Id EmitBitCastU16F16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpBitcast(ctx.U16, value);
|
||||
}
|
||||
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
||||
|
|
|
@ -120,6 +120,7 @@ void EmitGetGotoVariable(EmitContext&) {
|
|||
}
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx) {
|
||||
return ctx.u32_zero_value;
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
|
@ -149,6 +150,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
// Attribute is disabled or varying component is not written
|
||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||
}
|
||||
if (param.is_default) {
|
||||
return ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
||||
}
|
||||
|
||||
if (param.num_components > 1) {
|
||||
const Id pointer{
|
||||
|
@ -208,7 +212,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
|
||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||
ctx.OpStore(pointer, value);
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
|
|
|
@ -259,4 +259,8 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value) {
|
|||
return ctx.OpUConvert(ctx.U16, value);
|
||||
}
|
||||
|
||||
Id EmitConvertU32U16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpUConvert(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -385,4 +385,8 @@ Id EmitFPIsInf64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpIsInf(ctx.U1[1], value);
|
||||
}
|
||||
|
||||
void EmitFPCmpClass32(EmitContext&) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -70,7 +70,6 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id o
|
|||
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
||||
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
@ -106,8 +105,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
|
|||
const auto type = ctx.info.images[handle & 0xFFFF].type;
|
||||
const Id zero = ctx.u32_zero_value;
|
||||
const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }};
|
||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa &&
|
||||
type != AmdGpu::ImageType::Buffer};
|
||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa};
|
||||
const auto query{[&](Id type) {
|
||||
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
|
||||
: ctx.OpImageQuerySize(type, image);
|
||||
|
|
|
@ -42,6 +42,7 @@ void EmitSetVcc(EmitContext& ctx);
|
|||
void EmitSetSccLo(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitSetVccHi(EmitContext& ctx);
|
||||
void EmitFPCmpClass32(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitDiscard(EmitContext& ctx);
|
||||
|
@ -148,7 +149,7 @@ Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
|||
Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
||||
Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
||||
Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
||||
void EmitBitCastU16F16(EmitContext& ctx);
|
||||
Id EmitBitCastU16F16(EmitContext& ctx, Id value);
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
|
||||
void EmitBitCastU64F64(EmitContext& ctx);
|
||||
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
|
||||
|
@ -282,6 +283,7 @@ Id EmitBitCount32(EmitContext& ctx, Id value);
|
|||
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
|
||||
Id EmitFindSMsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindUMsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindILsb32(EmitContext& ctx, Id value);
|
||||
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUMin32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMax32(EmitContext& ctx, Id a, Id b);
|
||||
|
@ -353,6 +355,7 @@ Id EmitConvertF64U16(EmitContext& ctx, Id value);
|
|||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32U16(EmitContext& ctx, Id value);
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
|
@ -387,6 +390,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
|||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitWarpId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -198,6 +198,10 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
|
|||
return ctx.OpFindUMsb(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFindILsb32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFindILsb(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpSMin(ctx.U32[1], a, b);
|
||||
}
|
||||
|
|
|
@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) {
|
|||
return ctx.ConstU32(static_cast<u32>(spv::Scope::Subgroup));
|
||||
}
|
||||
|
||||
Id EmitWarpId(EmitContext& ctx) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id);
|
||||
}
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
|
|||
DefineInterfaces(program);
|
||||
DefineBuffers(info);
|
||||
DefineImagesAndSamplers(info);
|
||||
DefineSharedMemory(info);
|
||||
DefineSharedMemory();
|
||||
}
|
||||
|
||||
EmitContext::~EmitContext() = default;
|
||||
|
@ -86,6 +86,7 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
F32[1] = Name(TypeFloat(32), "f32_id");
|
||||
S32[1] = Name(TypeSInt(32), "i32_id");
|
||||
U32[1] = Name(TypeUInt(32), "u32_id");
|
||||
U64 = Name(TypeUInt(64), "u64_id");
|
||||
|
||||
for (u32 i = 2; i <= 4; i++) {
|
||||
if (info.uses_fp16) {
|
||||
|
@ -126,6 +127,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32[4];
|
||||
|
@ -146,6 +148,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return {id, input_u32, U32[1], 4};
|
||||
|
@ -204,7 +207,9 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
: 1;
|
||||
// Note that we pass index rather than Id
|
||||
input_params[input.binding] = {
|
||||
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
|
||||
rate_idx, input_u32,
|
||||
U32[1], input.num_components,
|
||||
false, input.instance_data_buf,
|
||||
};
|
||||
} else {
|
||||
Id id{DefineInput(type, input.binding)};
|
||||
|
@ -220,19 +225,18 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
if (info.uses_group_quad) {
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input);
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
for (const auto& input : info.ps_inputs) {
|
||||
const u32 semantic = input.param_index;
|
||||
if (input.is_default) {
|
||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), input_f32,
|
||||
F32[1]};
|
||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
|
||||
F32[1], 4, true};
|
||||
continue;
|
||||
}
|
||||
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
|
||||
|
@ -392,7 +396,16 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||
return spv::ImageFormat::Rgba8ui;
|
||||
}
|
||||
UNREACHABLE();
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::R11fG11fB10f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rgba32f;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown storage format data_format={}, num_format={}", image.GetDataFmt(),
|
||||
image.GetNumberFmt());
|
||||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
|
@ -412,8 +425,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Buffer:
|
||||
throw NotImplementedException("Image buffer");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -471,10 +482,14 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory(const Info& info) {
|
||||
if (info.shared_memory_size == 0) {
|
||||
void EmitContext::DefineSharedMemory() {
|
||||
static constexpr size_t DefaultSharedMemSize = 16_KB;
|
||||
if (!info.uses_shared) {
|
||||
return;
|
||||
}
|
||||
if (info.shared_memory_size == 0) {
|
||||
info.shared_memory_size = DefaultSharedMemSize;
|
||||
}
|
||||
const auto make{[&](Id element_type, u32 element_size) {
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
|
||||
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||
|
|
|
@ -180,6 +180,7 @@ public:
|
|||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id subgroup_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
|
@ -219,6 +220,7 @@ public:
|
|||
Id pointer_type;
|
||||
Id component_type;
|
||||
u32 num_components;
|
||||
bool is_default{};
|
||||
s32 buffer_handle{-1};
|
||||
};
|
||||
std::array<SpirvAttribute, 32> input_params{};
|
||||
|
@ -231,7 +233,7 @@ private:
|
|||
void DefineOutputs(const Info& info);
|
||||
void DefineBuffers(const Info& info);
|
||||
void DefineImagesAndSamplers(const Info& info);
|
||||
void DefineSharedMemory(const Info& info);
|
||||
void DefineSharedMemory();
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue