mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 08:06:20 +00:00
video_core: Track renderpass scopes properly
This commit is contained in:
parent
ad10020836
commit
22b930ba5e
36 changed files with 400 additions and 166 deletions
|
@ -200,6 +200,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
||||
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
// if (program.info.stores_frag_depth) {
|
||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
// }
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
void EmitBitCastU16F16(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
||||
|
@ -15,11 +15,11 @@ Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
|||
}
|
||||
|
||||
void EmitBitCastU64F64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitBitCastF16U16(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
||||
|
@ -27,7 +27,7 @@ Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
|||
}
|
||||
|
||||
void EmitBitCastF64U64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitPackUint2x32(EmitContext& ctx, Id value) {
|
||||
|
|
|
@ -115,27 +115,27 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index
|
|||
}
|
||||
|
||||
void EmitCompositeConstructF64x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF64x3(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitCompositeConstructF64x4(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF64x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF64x3(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitCompositeExtractF64x4(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
|
||||
|
|
|
@ -195,10 +195,36 @@ void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (info.index_enable && info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (info.index_enable) {
|
||||
for (u32 i = 0; i < 3; i++) {
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
|
||||
}
|
||||
return;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (info.index_enable && info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (info.index_enable) {
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
|
||||
}
|
||||
return;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,18 +1,34 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
struct ImageOperands {
|
||||
void Add(spv::ImageOperandsMask new_mask, Id value) {
|
||||
mask = static_cast<spv::ImageOperandsMask>(static_cast<u32>(mask) |
|
||||
static_cast<u32>(new_mask));
|
||||
operands.push_back(value);
|
||||
}
|
||||
|
||||
spv::ImageOperandsMask mask{};
|
||||
boost::container::static_vector<Id, 4> operands;
|
||||
};
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords);
|
||||
ImageOperands operands;
|
||||
if (Sirit::ValidId(offset)) {
|
||||
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||
}
|
||||
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
|
@ -25,9 +41,13 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
|
|||
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref);
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||
|
@ -42,12 +62,16 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
|||
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref);
|
||||
}
|
||||
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||
|
@ -83,21 +107,21 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
|
|||
case AmdGpu::ImageType::Color3D:
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
|
||||
default:
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
||||
|
|
|
@ -338,13 +338,13 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
|
|||
Id offset);
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||
Id coords, Id dref, Id bias_lc, const IR::Value& offset);
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||
Id bias_lc, Id offset);
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2);
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2, Id dref);
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||
Id ms);
|
||||
|
|
|
@ -11,7 +11,7 @@ Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
|
|||
}
|
||||
|
||||
Id EmitSelectU8(EmitContext&, Id, Id, Id) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
|
||||
|
|
|
@ -11,11 +11,11 @@ Id EmitUndefU1(EmitContext& ctx) {
|
|||
}
|
||||
|
||||
Id EmitUndefU8(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitUndefU16(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitUndefU32(EmitContext& ctx) {
|
||||
|
@ -23,7 +23,7 @@ Id EmitUndefU32(EmitContext& ctx) {
|
|||
}
|
||||
|
||||
Id EmitUndefU64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -118,6 +118,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32[4];
|
||||
|
@ -137,6 +138,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return {id, input_u32, U32[1], 4};
|
||||
|
@ -253,6 +255,7 @@ void EmitContext::DefineOutputs(const Info& info) {
|
|||
}
|
||||
|
||||
void EmitContext::DefineBuffers(const Info& info) {
|
||||
boost::container::small_vector<Id, 8> type_ids;
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
|
@ -260,13 +263,15 @@ void EmitContext::DefineBuffers(const Info& info) {
|
|||
const u32 num_elements = stride * buffer.num_records;
|
||||
const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))};
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
|
||||
const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
}
|
||||
type_ids.push_back(record_array_type);
|
||||
|
||||
const auto storage_class =
|
||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
|
|
|
@ -329,12 +329,16 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
break;
|
||||
case Opcode::IMAGE_SAMPLE_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_O:
|
||||
case Opcode::IMAGE_SAMPLE_C:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_GATHER4_C:
|
||||
translator.IMAGE_GATHER(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_STORE:
|
||||
translator.IMAGE_STORE(inst);
|
||||
break;
|
||||
|
@ -450,16 +454,22 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.BUFFER_LOAD_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
||||
case Opcode::BUFFER_LOAD_DWORDX3:
|
||||
translator.BUFFER_LOAD_FORMAT(3, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||
case Opcode::BUFFER_LOAD_DWORDX4:
|
||||
translator.BUFFER_LOAD_FORMAT(4, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
translator.BUFFER_STORE_FORMAT(3, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
translator.BUFFER_STORE_FORMAT(4, false, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_F32:
|
||||
|
|
|
@ -146,6 +146,7 @@ public:
|
|||
// MIMG
|
||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||
void IMAGE_GATHER(const GcnInst& inst);
|
||||
void IMAGE_STORE(const GcnInst& inst);
|
||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||
|
||||
|
|
|
@ -76,6 +76,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
|
@ -108,6 +109,74 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
if (mimg.da) {
|
||||
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
|
||||
}
|
||||
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
||||
const auto flags = MimgModifierFlags(mimg.mod);
|
||||
|
||||
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
||||
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
||||
// binding index.
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg));
|
||||
|
||||
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
|
||||
// Set Architecture
|
||||
const IR::Value offset =
|
||||
flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{};
|
||||
const IR::F32 bias =
|
||||
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||
const IR::F32 dref =
|
||||
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||
|
||||
// Derivatives are tricky because their number depends on the texture type which is located in
|
||||
// T#. We don't have access to T# though until resource tracking pass. For now assume no
|
||||
// derivatives are present, otherwise we don't know where coordinates are placed in the address
|
||||
// stream.
|
||||
ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction");
|
||||
|
||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
||||
// in coords field of the instruction. Then the resource tracking pass will patch the
|
||||
// IR instruction to fill in lod_clamp field.
|
||||
const IR::Value body = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||
|
||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
||||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
||||
if (!flags.test(MimgModifier::Pcf)) {
|
||||
return ir.ImageGather(handle, body, offset, {}, info);
|
||||
}
|
||||
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
|
||||
}();
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||
ir.SetVectorReg(dest_reg++, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
|
|
|
@ -198,6 +198,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
|
@ -207,7 +208,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
@ -252,25 +253,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
|||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
std::deque<IR::Inst*> insts{&inst};
|
||||
const auto& pred = [](auto opcode) -> bool {
|
||||
return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData);
|
||||
};
|
||||
|
||||
IR::Inst* producer{};
|
||||
while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) {
|
||||
for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) {
|
||||
const auto arg = producer->Arg(arg_idx);
|
||||
if (arg.TryInstRecursive()) {
|
||||
insts.push_back(arg.InstRecursive());
|
||||
}
|
||||
}
|
||||
insts.pop_front();
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
while (producer->GetOpcode() == IR::Opcode::Phi) {
|
||||
producer = producer->Arg(0).InstRecursive();
|
||||
}
|
||||
|
||||
ASSERT(pred(producer->GetOpcode()));
|
||||
ASSERT(producer->GetOpcode() ==
|
||||
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
producer->GetOpcode() == IR::Opcode::GetUserData);
|
||||
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||
return std::make_pair(producer->Arg(0).InstRecursive(),
|
||||
|
@ -335,6 +325,22 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst_info.has_offset) {
|
||||
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
|
||||
const u32 arg_pos = inst_info.is_depth ? 4 : 3;
|
||||
const IR::Value arg = inst.Arg(arg_pos);
|
||||
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
||||
const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); };
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
BitField<16, 6, u32> z;
|
||||
} offset{arg.U32()};
|
||||
const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y));
|
||||
inst.SetArg(arg_pos, value);
|
||||
}
|
||||
|
||||
if (inst_info.has_lod_clamp) {
|
||||
// Final argument contains lod_clamp
|
||||
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
||||
|
|
|
@ -38,6 +38,7 @@ union TextureInstInfo {
|
|||
BitField<2, 1, u32> has_lod_clamp;
|
||||
BitField<3, 1, u32> force_level0;
|
||||
BitField<4, 1, u32> explicit_lod;
|
||||
BitField<5, 1, u32> has_offset;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
|
|
|
@ -52,7 +52,7 @@ struct BufferResource {
|
|||
|
||||
auto operator<=>(const BufferResource&) const = default;
|
||||
};
|
||||
using BufferResourceList = boost::container::static_vector<BufferResource, 8>;
|
||||
using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
|
||||
|
||||
struct ImageResource {
|
||||
u32 sgpr_base;
|
||||
|
@ -62,13 +62,13 @@ struct ImageResource {
|
|||
bool is_storage;
|
||||
bool is_depth;
|
||||
};
|
||||
using ImageResourceList = boost::container::static_vector<ImageResource, 8>;
|
||||
using ImageResourceList = boost::container::static_vector<ImageResource, 16>;
|
||||
|
||||
struct SamplerResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
};
|
||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 8>;
|
||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
||||
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue