mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 16:16:20 +00:00
Image subresources barriers (#904)
* video_core: texture: image subresources state tracking * shader_recompiler: use one binding if the same image is read and written * video_core: added rebinding of changed textures after overlap resolve * don't use pointers; slight `FindTexture` refactoring * video_core: buffer_cache: don't copy over the image size * redundant barriers removed; fixes * regression fixes * texture_cache: 3d texture layers count fixup * shader_recompiler: support for partially bound cubemaps * added support for cubemap arrays * don't bind unused color buffers * fixed depth promotion to do not use stencil * doors * bonfire lit * cubemap array index calculation * final touches
This commit is contained in:
parent
913a46173a
commit
5f4ddc14fc
35 changed files with 495 additions and 283 deletions
|
@ -157,8 +157,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
|
|||
ImageOperands operands;
|
||||
operands.AddOffset(ctx, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||
return ctx.OpBitcast(
|
||||
ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands));
|
||||
const Id texel =
|
||||
texture.is_storage
|
||||
? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands)
|
||||
: ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands);
|
||||
return ctx.OpBitcast(ctx.F32[4], texel);
|
||||
}
|
||||
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {
|
||||
|
|
|
@ -510,7 +510,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
case AmdGpu::ImageType::Color3D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
|
||||
format);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -534,6 +535,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
|
||||
.pointer_type = pointer_type,
|
||||
.image_type = image_type,
|
||||
.is_storage = image_desc.is_storage,
|
||||
});
|
||||
interfaces.push_back(id);
|
||||
++binding;
|
||||
|
|
|
@ -200,6 +200,7 @@ public:
|
|||
Id sampled_type;
|
||||
Id pointer_type;
|
||||
Id image_type;
|
||||
bool is_storage = false;
|
||||
};
|
||||
|
||||
struct BufferDefinition {
|
||||
|
@ -216,8 +217,8 @@ public:
|
|||
u32 binding;
|
||||
Id image_type;
|
||||
Id result_type;
|
||||
bool is_integer;
|
||||
bool is_storage;
|
||||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
};
|
||||
|
||||
u32& binding;
|
||||
|
|
|
@ -1032,6 +1032,7 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) {
|
|||
|
||||
m_instruction.control.mimg = *reinterpret_cast<InstControlMIMG*>(&hexInstruction);
|
||||
m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode);
|
||||
ASSERT(m_instruction.control.mimg.r128 == 0);
|
||||
}
|
||||
|
||||
void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) {
|
||||
|
|
|
@ -71,6 +71,9 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
ir.SetAttribute(attrib, comp, swizzle(i));
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attrib)) {
|
||||
info.mrt_mask |= 1u << u8(attrib);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -546,6 +546,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
info.has_derivatives.Assign(has_derivatives);
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
|
@ -630,6 +631,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
|||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
// info.explicit_lod.Assign(explicit_lod);
|
||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
|
|
|
@ -64,9 +64,10 @@ struct ImageResource {
|
|||
u32 dword_offset;
|
||||
AmdGpu::ImageType type;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage;
|
||||
bool is_depth;
|
||||
bool is_storage{};
|
||||
bool is_depth{};
|
||||
bool is_atomic{};
|
||||
bool is_array{};
|
||||
|
||||
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
|
@ -171,6 +172,7 @@ struct Info {
|
|||
bool uses_fp64{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
u8 mrt_mask{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
|
|
|
@ -200,9 +200,10 @@ public:
|
|||
u32 Add(const ImageResource& desc) {
|
||||
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset && desc.type == existing.type &&
|
||||
desc.is_storage == existing.is_storage;
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
})};
|
||||
auto& image = image_resources[index];
|
||||
image.is_storage |= desc.is_storage;
|
||||
return index;
|
||||
}
|
||||
|
||||
|
@ -441,18 +442,29 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
const IR::Value& z, bool is_storage) {
|
||||
const IR::Value& z, bool is_storage, bool is_array) {
|
||||
// When cubemap is written with imageStore it is treated like 2DArray.
|
||||
if (is_storage) {
|
||||
return ir.CompositeConstruct(s, t, z);
|
||||
}
|
||||
|
||||
ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
|
||||
|
||||
// We need to fix x and y coordinate,
|
||||
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
|
||||
// We already force the scale value to be 1.0 when handling v_cubema_f32,
|
||||
// here we subtract 1.5 to recover the original value.
|
||||
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
|
||||
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
|
||||
return ir.CompositeConstruct(x, y, z);
|
||||
if (is_array) {
|
||||
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
|
||||
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
|
||||
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
|
||||
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
|
||||
ir.ConvertIToF(32, 32, false, slice_id));
|
||||
} else {
|
||||
return ir.CompositeConstruct(x, y, z);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
|
@ -481,14 +493,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_storage = IsImageStorageInstruction(inst);
|
||||
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
.type = image.GetType(),
|
||||
.type = type,
|
||||
.nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
|
||||
.is_storage = is_storage,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
});
|
||||
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
|
@ -545,7 +559,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
case AmdGpu::ImageType::Color3D: // x, y, z
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube: // x, y, face
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage),
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage,
|
||||
inst_info.is_array),
|
||||
body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
|
|
|
@ -59,6 +59,7 @@ union TextureInstInfo {
|
|||
BitField<5, 1, u32> has_offset;
|
||||
BitField<6, 2, u32> gather_comp;
|
||||
BitField<8, 1, u32> has_derivatives;
|
||||
BitField<9, 1, u32> is_array;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
|
|
|
@ -62,7 +62,8 @@ struct StageSpecialization {
|
|||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetType();
|
||||
spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
|
||||
: sharp.GetType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue