shader_recompiler: Improvements to array and cube handling. (#2083)

* shader_recompiler: Account for instruction array flag in image type.

* shader_recompiler: Check da flag for all mimg instructions.

* shader_recompiler: Convert cube images into 2D arrays.

* shader_recompiler: Move image resource functions into sharp type.

* shader_recompiler: Use native AMD cube instructions when possible.

* specialization: Fix buffer storage mistake.
This commit is contained in:
squidbus 2025-01-10 00:48:12 -08:00 committed by GitHub
parent 93402620de
commit 725814ce01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 217 additions and 144 deletions

View file

@ -172,20 +172,19 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
const auto type = sharp.GetBoundType();
const Id zero = ctx.u32_zero_value;
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
const bool uses_lod{texture.bound_type != AmdGpu::ImageType::Color2DMsaa &&
!texture.is_storage};
const auto query{[&](Id type) {
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
: ctx.OpImageQuerySize(type, image);
}};
switch (type) {
switch (texture.bound_type) {
case AmdGpu::ImageType::Color1D:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
case AmdGpu::ImageType::Color1DArray:
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DMsaa:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
case AmdGpu::ImageType::Color2DArray:
@ -257,4 +256,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
}
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
if (ctx.profile.supports_native_cube_calc) {
return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords);
} else {
UNREACHABLE_MSG("SPIR-V Instruction");
}
}
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
if (ctx.profile.supports_native_cube_calc) {
return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords);
} else {
UNREACHABLE_MSG("SPIR-V Instruction");
}
}
} // namespace Shader::Backend::SPIRV

View file

@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);

View file

@ -773,8 +773,8 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto image = desc.GetSharp(ctx.info);
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
const auto type = image.GetBoundType();
const u32 sampled = desc.IsStorage(image) ? 2 : 1;
const auto type = image.GetBoundType(desc.is_array);
const u32 sampled = desc.is_written ? 2 : 1;
switch (type) {
case AmdGpu::ImageType::Color1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
format);
default:
break;
}
@ -802,7 +799,7 @@ void EmitContext::DefineImagesAndSamplers() {
const auto sharp = image_desc.GetSharp(info);
const auto nfmt = sharp.GetNumberFmt();
const bool is_integer = AmdGpu::IsInteger(nfmt);
const bool is_storage = image_desc.IsStorage(sharp);
const bool is_storage = image_desc.is_written;
const VectorIds& data_types = GetAttributeType(*this, nfmt);
const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)};
@ -817,6 +814,7 @@ void EmitContext::DefineImagesAndSamplers() {
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type,
.image_type = image_type,
.bound_type = sharp.GetBoundType(image_desc.is_array),
.is_integer = is_integer,
.is_storage = is_storage,
});

View file

@ -222,6 +222,7 @@ public:
Id sampled_type;
Id pointer_type;
Id image_type;
AmdGpu::ImageType bound_type;
bool is_integer = false;
bool is_storage = false;
};

View file

@ -301,6 +301,9 @@ private:
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
void LogMissingOpcode(const GcnInst& inst);
private:

View file

@ -3,6 +3,7 @@
#include "shader_recompiler/frontend/opcodes.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn {
@ -1042,20 +1043,92 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
V_MAD_I32_I24(inst, false);
}
IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res,
const IR::F32& z_res) {
const auto abs_x = ir.FPAbs(x);
const auto abs_y = ir.FPAbs(y);
const auto abs_z = ir.FPAbs(z);
const auto z_face_cond{
ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))};
const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)};
return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))};
}
void Translator::V_CUBEID_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z));
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
result = SelectCubeResult(x, y, z, x_face, y_face, z_face);
}
SetDst(inst.dst[0], result);
}
void Translator::V_CUBESC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 0)};
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
result = SelectCubeResult(x, y, z, x_sc, x, z_sc);
}
SetDst(inst.dst[0], result);
}
void Translator::V_CUBETC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 1)};
} else {
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const IR::F32 x_z_sc{ir.FPNeg(y)};
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc);
}
SetDst(inst.dst[0], result);
}
void Translator::V_CUBEMA_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.Imm32(1.f));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto two{ir.Imm32(4.f)};
const IR::F32 x_major_axis{ir.FPMul(x, two)};
const IR::F32 y_major_axis{ir.FPMul(y, two)};
const IR::F32 z_major_axis{ir.FPMul(z, two)};
const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)};
SetDst(inst.dst[0], result);
}
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {

View file

@ -418,6 +418,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info);
for (u32 i = 0; i < 4; i++) {
@ -442,6 +443,7 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) {
@ -456,13 +458,18 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
}
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const auto flags = ImageResFlags(inst.control.mimg.dmask);
const bool has_mips = flags.test(ImageResComponent::MipCount);
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
const IR::Value tsharp = ir.GetScalarReg(tsharp_reg);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips));
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info);
if (flags.test(ImageResComponent::Width)) {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)});
@ -484,6 +491,9 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
@ -494,25 +504,25 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
case AtomicOp::Swap:
return ir.ImageAtomicExchange(handle, body, value, {});
case AtomicOp::Add:
return ir.ImageAtomicIAdd(handle, body, value, {});
return ir.ImageAtomicIAdd(handle, body, value, info);
case AtomicOp::Smin:
return ir.ImageAtomicIMin(handle, body, value, true, {});
return ir.ImageAtomicIMin(handle, body, value, true, info);
case AtomicOp::Umin:
return ir.ImageAtomicUMin(handle, body, value, {});
return ir.ImageAtomicUMin(handle, body, value, info);
case AtomicOp::Smax:
return ir.ImageAtomicIMax(handle, body, value, true, {});
return ir.ImageAtomicIMax(handle, body, value, true, info);
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, {});
return ir.ImageAtomicUMax(handle, body, value, info);
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, {});
return ir.ImageAtomicAnd(handle, body, value, info);
case AtomicOp::Or:
return ir.ImageAtomicOr(handle, body, value, {});
return ir.ImageAtomicOr(handle, body, value, info);
case AtomicOp::Xor:
return ir.ImageAtomicXor(handle, body, value, {});
return ir.ImageAtomicXor(handle, body, value, info);
case AtomicOp::Inc:
return ir.ImageAtomicInc(handle, body, value, {});
return ir.ImageAtomicInc(handle, body, value, info);
case AtomicOp::Dec:
return ir.ImageAtomicDec(handle, body, value, {});
return ir.ImageAtomicDec(handle, body, value, info);
default:
UNREACHABLE();
}
@ -643,11 +653,14 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
const IR::Value lod = ir.ImageQueryLod(handle, body, {});
const IR::Value lod = ir.ImageQueryLod(handle, body, info);
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)});
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
}

View file

@ -70,14 +70,8 @@ struct ImageResource {
bool is_depth{};
bool is_atomic{};
bool is_array{};
bool is_read{};
bool is_written{};
[[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept {
// Need cube as storage when used with ImageRead.
return is_written || (is_read && image.GetBoundType() == AmdGpu::ImageType::Cube);
}
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;

View file

@ -1732,11 +1732,6 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
}
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips) {
return Inst(Opcode::ImageQueryDimensions, handle, lod, skip_mips);
}
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips, TextureInstInfo info) {
return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips);
@ -1763,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32&
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color);
}
[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) {
return Inst(Opcode::CubeFaceCoord, cube_coords);
}
[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) {
return Inst<F32>(Opcode::CubeFaceIndex, cube_coords);
}
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
// Renderdoc will hook in its own implementation of the SPIRV instruction
// Renderdoc accepts format specifiers, e.g. %u, listed here:

View file

@ -324,8 +324,6 @@ public:
const F32& dref, const F32& lod,
const Value& offset, TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
const U1& skip_mips);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
const U1& skip_mips, TextureInstInfo info);
@ -344,6 +342,9 @@ public:
void ImageWrite(const Value& handle, const Value& coords, const U32& lod,
const U32& multisampling, const Value& color, TextureInstInfo info);
[[nodiscard]] Value CubeFaceCoord(const Value& cube_coords);
[[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords);
void EmitVertex();
void EmitPrimitive();

View file

@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Cube operations - optional, usable if profile.supports_native_cube_calc
OPCODE(CubeFaceCoord, F32x2, F32x3, )
OPCODE(CubeFaceIndex, F32, F32x3, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(WarpId, U32, )

View file

@ -161,10 +161,9 @@ public:
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx;
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array;
})};
auto& image = image_resources[index];
image.is_read |= desc.is_read;
image.is_written |= desc.is_written;
return index;
}
@ -361,7 +360,6 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
image = AmdGpu::Image::Null();
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
// Patch image instruction if image is FMask.
@ -402,7 +400,6 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
.is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
.is_read = is_read,
.is_written = is_written,
});
@ -560,32 +557,6 @@ void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
}
}
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
const IR::Value& z, bool is_written, bool is_array) {
// When cubemap is written with imageStore it is treated like 2DArray.
if (is_written) {
return ir.CompositeConstruct(s, t, z);
}
ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
// We need to fix x and y coordinate,
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
// We already force the scale value to be 1.0 when handling v_cubema_f32,
// here we subtract 1.5 to recover the original value.
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
if (is_array) {
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
ir.ConvertIToF(32, 32, false, slice_id));
} else {
return ir.CompositeConstruct(x, y, z);
}
}
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
const AmdGpu::Image& image) {
const auto handle = inst.Arg(0);
@ -649,7 +620,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
case AmdGpu::ImageType::Color2DMsaa:
return ir.CompositeConstruct(read(0), read(8));
case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Cube:
return ir.CompositeConstruct(read(0), read(8), read(16));
default:
UNREACHABLE();
@ -675,7 +645,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)),
ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))};
case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Cube:
// (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy)
addr_reg = addr_reg + 6;
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5),
@ -691,7 +660,8 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
// Query dimensions of image if needed for normalization.
// We can't use the image sharp because it could be bound to a different image later.
const auto dimensions =
unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{};
unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false), inst_info)
: IR::Value{};
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
const auto coord = get_addr_reg(coord_idx);
if (unnormalized) {
@ -724,10 +694,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_coord(addr_reg - 1, 2));
case AmdGpu::ImageType::Cube: // x, y, face
addr_reg = addr_reg + 3;
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
default:
UNREACHABLE();
}
@ -805,10 +771,6 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
[[fallthrough]];
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
body->Arg(3)};
default:
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
}
@ -820,7 +782,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
const auto is_storage = image_res.IsStorage(image);
const auto is_storage = image_res.is_written;
if (inst.GetOpcode() == IR::Opcode::ImageRead) {
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
if (is_storage) {

View file

@ -5,7 +5,7 @@
namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) {
void Visit(Info& info, const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32:

View file

@ -24,6 +24,7 @@ struct Profile {
bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{};
bool supports_native_cube_calc{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};

View file

@ -113,9 +113,9 @@ struct StageSpecialization {
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetBoundType();
spec.type = sharp.GetBoundType(desc.is_array);
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.is_storage = desc.IsStorage(sharp);
spec.is_storage = desc.is_written;
if (spec.is_storage) {
spec.dst_select = sharp.DstSelect();
}