shader_recompiler: Check usage before enabling capabilities (#245)

* vk_instance: Better feature check

* shader_recompiler: Make most features optional

* vk_instance: Bump extension vector size

* resource_tracking_pass: Perform BFS for sharp tracking

* The Witness triggered this
This commit is contained in:
TheTurtle 2024-07-06 02:42:16 +03:00 committed by GitHub
parent 67af53fd58
commit 38080b60af
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 151 additions and 61 deletions

View file

@ -174,14 +174,18 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
}
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const auto& info = program.info;
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
if (info.uses_fp16) {
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);
}
if (info.has_storage_images) {
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
}
switch (program.info.stage) {
case Stage::Compute: {
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
@ -200,13 +204,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
} else {
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
}
if (program.info.uses_group_quad) {
if (info.uses_group_quad) {
ctx.AddCapability(spv::Capability::GroupNonUniform);
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
}
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
ctx.AddCapability(spv::Capability::ImageGatherExtended);
ctx.AddCapability(spv::Capability::ImageQuery);
if (info.has_discard) {
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
}
if (info.has_image_gather) {
ctx.AddCapability(spv::Capability::ImageGatherExtended);
}
if (info.has_image_query) {
ctx.AddCapability(spv::Capability::ImageQuery);
}
// if (program.info.stores_frag_depth) {
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
// }

View file

@ -74,21 +74,19 @@ Id EmitContext::Def(const IR::Value& value) {
void EmitContext::DefineArithmeticTypes() {
void_id = Name(TypeVoid(), "void_id");
U1[1] = Name(TypeBool(), "bool_id");
F16[1] = Name(TypeFloat(16), "f16_id");
if (info.uses_fp16) {
F16[1] = Name(TypeFloat(16), "f16_id");
U16 = Name(TypeUInt(16), "u16_id");
}
F32[1] = Name(TypeFloat(32), "f32_id");
// F64[1] = Name(TypeFloat(64), "f64_id");
S32[1] = Name(TypeSInt(32), "i32_id");
U32[1] = Name(TypeUInt(32), "u32_id");
// U8 = Name(TypeSInt(8), "u8");
// S8 = Name(TypeUInt(8), "s8");
U16 = Name(TypeUInt(16), "u16_id");
// S16 = Name(TypeSInt(16), "s16_id");
// U64 = Name(TypeUInt(64), "u64_id");
for (u32 i = 2; i <= 4; i++) {
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
if (info.uses_fp16) {
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
}
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
// F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));
U32[i] = Name(TypeVector(U32[1], i), fmt::format("u32vec{}_id", i));
U1[i] = Name(TypeVector(U1[1], i), fmt::format("bvec{}_id", i));

View file

@ -396,6 +396,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
case Opcode::IMAGE_SAMPLE_C_LZ_O:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_ATOMIC_ADD:

View file

@ -0,0 +1,52 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <optional>
#include <type_traits>
#include <boost/container/small_vector.hpp>
#include <queue>
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
template <typename Pred>
auto BreadthFirstSearch(const Value& value, Pred&& pred)
-> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
}
// Breadth-first search visiting the right most arguments first
boost::container::small_vector<const Inst*, 2> visited;
std::queue<const Inst*> queue;
queue.push(value.InstRecursive());
while (!queue.empty()) {
// Pop one instruction from the queue
const Inst* const inst{queue.front()};
queue.pop();
if (const std::optional result = pred(inst)) {
// This is the instruction we were looking for
return result;
}
// Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) {
const Value arg_value{inst->Arg(arg)};
if (arg_value.IsImmediate()) {
continue;
}
// Queue instruction if it hasn't been visited
const Inst* const arg_inst{arg_value.InstRecursive()};
if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst);
queue.push(arg_inst);
}
}
}
// SSA tree has been traversed and the result hasn't been found
return std::nullopt;
}
} // namespace Shader::IR

View file

@ -4,8 +4,8 @@
#include <algorithm>
#include <deque>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
@ -244,22 +244,19 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
// Retrieve SGPR pair that holds sbase
const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive();
const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive();
while (sbase0->GetOpcode() == IR::Opcode::Phi) {
sbase0 = sbase0->Arg(0).TryInstRecursive();
}
while (sbase1->GetOpcode() == IR::Opcode::Phi) {
sbase1 = sbase1->Arg(0).TryInstRecursive();
}
ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData &&
sbase1->GetOpcode() == IR::Opcode::GetUserData,
"Nested resource loads not supported");
const IR::ScalarReg base = sbase0->Arg(0).ScalarReg();
const auto pred = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst->Arg(0).ScalarReg();
}
return std::nullopt;
};
const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred);
const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred);
ASSERT_MSG(base0 && base1, "Nested resource loads not supported");
// Return retrieved location.
return SharpLocation{
.sgpr_base = u32(base),
.sgpr_base = u32(base0.value()),
.dword_offset = dword_offset,
};
}

View file

@ -26,9 +26,27 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::WriteSharedU16:
info.uses_shared_u16 = true;
break;
case IR::Opcode::ConvertF32F16:
case IR::Opcode::BitCastF16U16:
info.uses_fp16 = true;
break;
case IR::Opcode::ImageWrite:
info.has_storage_images = true;
break;
case IR::Opcode::QuadShuffle:
info.uses_group_quad = true;
break;
case IR::Opcode::Discard:
info.has_discard = true;
break;
case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref:
info.has_image_gather = true;
break;
case IR::Opcode::ImageQueryDimensions:
case IR::Opcode::ImageQueryLod:
info.has_image_query = true;
break;
default:
break;
}

View file

@ -169,9 +169,14 @@ struct Info {
uintptr_t pgm_base{};
u64 pgm_hash{};
u32 shared_memory_size{};
bool has_storage_images{};
bool has_discard{};
bool has_image_gather{};
bool has_image_query{};
bool uses_group_quad{};
bool uses_shared_u8{};
bool uses_shared_u16{};
bool uses_fp16{};
bool translation_failed{}; // indicates that shader has unsupported instructions
template <typename T>