mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 08:06:20 +00:00
video_core: Add fallback path for pipelines with more than 32 bindings (#837)
* video_core: Small fixes * renderer_vulkan: Add fallback path for pipelines with more than 32 bindings * vk_resource_pool: Rewrite desc heap * work
This commit is contained in:
parent
3a65052b8e
commit
b0bbb16aae
27 changed files with 223 additions and 148 deletions
|
@ -208,6 +208,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
if (info.uses_group_ballot) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||
}
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||
|
|
|
@ -305,7 +305,7 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
|||
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
|
||||
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
|
||||
if (buffer.is_integer) {
|
||||
value = ctx.OpBitcast(ctx.U32[4], value);
|
||||
value = ctx.OpBitcast(ctx.S32[4], value);
|
||||
}
|
||||
ctx.OpImageWrite(tex_buffer, coord, value);
|
||||
}
|
||||
|
|
|
@ -27,7 +27,8 @@ Id EmitReadFirstLane(EmitContext& ctx, Id value) {
|
|||
}
|
||||
|
||||
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
|
||||
UNREACHABLE();
|
||||
return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
|
||||
ctx.ConstU32(lane));
|
||||
}
|
||||
|
||||
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
|
||||
|
|
|
@ -324,16 +324,18 @@ void EmitContext::DefineOutputs() {
|
|||
|
||||
void EmitContext::DefinePushDataBlock() {
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")};
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberName(struct_type, 2, "buf_offsets0");
|
||||
MemberName(struct_type, 3, "buf_offsets1");
|
||||
MemberName(struct_type, 4, "buf_offsets2");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
|
||||
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
|
||||
MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U);
|
||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(push_data_block, "push_data");
|
||||
interfaces.push_back(push_data_block);
|
||||
|
|
|
@ -171,7 +171,7 @@ T Translator::GetSrc(const InstOperand& operand) {
|
|||
}
|
||||
} else {
|
||||
if (operand.input_modifier.abs) {
|
||||
LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction");
|
||||
value = ir.IAbs(value);
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
UNREACHABLE();
|
||||
|
|
|
@ -117,6 +117,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
||||
case Opcode::BUFFER_ATOMIC_UMIN:
|
||||
return BUFFER_ATOMIC(AtomicOp::Umin, inst);
|
||||
case Opcode::BUFFER_ATOMIC_UMAX:
|
||||
return BUFFER_ATOMIC(AtomicOp::Umax, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -280,6 +284,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
|||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
// info.explicit_lod.Assign(explicit_lod);
|
||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
@ -89,7 +88,7 @@ struct PushData {
|
|||
|
||||
u32 step0;
|
||||
u32 step1;
|
||||
std::array<u8, 32> buf_offsets;
|
||||
std::array<u8, 48> buf_offsets;
|
||||
|
||||
void AddOffset(u32 binding, u32 offset) {
|
||||
ASSERT(offset < 256 && binding < buf_offsets.size());
|
||||
|
@ -166,6 +165,7 @@ struct Info {
|
|||
bool has_image_query{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_group_ballot{};
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_step_rates{};
|
||||
|
@ -181,6 +181,7 @@ struct Info {
|
|||
const u32* base = user_data.data();
|
||||
if (ptr_index != IR::NumScalarRegs) {
|
||||
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
||||
base = reinterpret_cast<const u32*>(VAddr(base) & 0xFFFFFFFFFFFFULL);
|
||||
}
|
||||
std::memcpy(&data, base + dword_offset, sizeof(T));
|
||||
return data;
|
||||
|
|
|
@ -21,8 +21,7 @@ void LowerSharedMemToRegisters(IR::Program& program) {
|
|||
const IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
|
||||
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
|
||||
return write_prod->Arg(1).U32() == prod->Arg(1).U32() &&
|
||||
write_prod->Arg(0) == prod->Arg(0);
|
||||
return write_prod->Arg(1).U32() == prod->Arg(1).U32();
|
||||
});
|
||||
ASSERT(it != ds_writes.end());
|
||||
// Replace data read with value written.
|
||||
|
|
|
@ -98,22 +98,7 @@ bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
|||
}
|
||||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
return IR::Type::U32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
return IR::Type::U32;
|
||||
}
|
||||
|
||||
bool IsImageAtomicInstruction(const IR::Inst& inst) {
|
||||
|
@ -223,12 +208,8 @@ public:
|
|||
|
||||
u32 Add(const SamplerResource& desc) {
|
||||
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
|
||||
if (desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset) {
|
||||
return true;
|
||||
}
|
||||
// Samplers with different bindings might still be the same.
|
||||
return existing.GetSharp(info) == desc.GetSharp(info);
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,11 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::QuadShuffle:
|
||||
info.uses_group_quad = true;
|
||||
break;
|
||||
case IR::Opcode::ReadLane:
|
||||
case IR::Opcode::ReadFirstLane:
|
||||
case IR::Opcode::WriteLane:
|
||||
info.uses_group_ballot = true;
|
||||
break;
|
||||
case IR::Opcode::Discard:
|
||||
case IR::Opcode::DiscardCond:
|
||||
info.has_discard = true;
|
||||
|
|
|
@ -37,14 +37,14 @@ struct ImageSpecialization {
|
|||
* after the first compilation of a module.
|
||||
*/
|
||||
struct StageSpecialization {
|
||||
static constexpr size_t MaxStageResources = 32;
|
||||
static constexpr size_t MaxStageResources = 64;
|
||||
|
||||
const Shader::Info* info;
|
||||
RuntimeInfo runtime_info;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 8> images;
|
||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||
u32 start_binding{};
|
||||
|
||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue