mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-18 01:14:56 +00:00
video_core: Account of runtime state changes when compiling shaders (#575)
* video_core: Compile shader permutations * spirv: Only specific storage image format for atomics * ir: Avoid cube coord patching for storage image * spirv: Fix default attributes * data_share: Add more instructions * video_core: Query storage flag with runtime state * kernel: Use std::list for semaphore * video_core: Use texture buffers for untyped format load/store * buffer_cache: Limit view usage * vk_pipeline_cache: Fix invalid iterator * image_view: Reduce log spam when alpha=1 in storage swizzle * video_core: More features and proper spirv feature detection * video_core: Attempt no2 for specialization * spirv: Remove conflict * vk_shader_cache: Small cleanup
This commit is contained in:
parent
790d19e59b
commit
66e96dd944
43 changed files with 1058 additions and 976 deletions
|
@ -18,25 +18,31 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
case Opcode::DS_READ2_B64:
|
||||
return DS_READ(64, false, true, inst);
|
||||
case Opcode::DS_WRITE_B32:
|
||||
return DS_WRITE(32, false, false, inst);
|
||||
return DS_WRITE(32, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2ST64_B32:
|
||||
return DS_WRITE(32, false, true, true, inst);
|
||||
case Opcode::DS_WRITE_B64:
|
||||
return DS_WRITE(64, false, false, inst);
|
||||
return DS_WRITE(64, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
return DS_WRITE(32, false, true, inst);
|
||||
return DS_WRITE(32, false, true, false, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
return DS_WRITE(64, false, true, inst);
|
||||
return DS_WRITE(64, false, true, false, inst);
|
||||
case Opcode::DS_ADD_U32:
|
||||
return DS_ADD_U32(inst, false);
|
||||
case Opcode::DS_MIN_U32:
|
||||
return DS_MIN_U32(inst, false);
|
||||
return DS_MIN_U32(inst, false, false);
|
||||
case Opcode::DS_MIN_I32:
|
||||
return DS_MIN_U32(inst, true, false);
|
||||
case Opcode::DS_MAX_U32:
|
||||
return DS_MAX_U32(inst, false);
|
||||
return DS_MAX_U32(inst, false, false);
|
||||
case Opcode::DS_MAX_I32:
|
||||
return DS_MAX_U32(inst, true, false);
|
||||
case Opcode::DS_ADD_RTN_U32:
|
||||
return DS_ADD_U32(inst, true);
|
||||
case Opcode::DS_MIN_RTN_U32:
|
||||
return DS_MIN_U32(inst, true);
|
||||
return DS_MIN_U32(inst, false, true);
|
||||
case Opcode::DS_MAX_RTN_U32:
|
||||
return DS_MAX_U32(inst, true);
|
||||
return DS_MAX_U32(inst, false, true);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -89,12 +95,13 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) {
|
||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
if (is_pair) {
|
||||
const u32 adj = bit_size == 32 ? 4 : 8;
|
||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
|
@ -133,23 +140,23 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitExport(const GcnInst& inst) {
|
||||
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
|
||||
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
|
||||
ir.Discard(ir.LogicalNot(ir.GetExec()));
|
||||
}
|
||||
|
||||
|
|
|
@ -354,7 +354,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
|
||||
const auto filename = fmt::format("vs_{:#018x}_fetch.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
@ -399,9 +399,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
info.buffers.push_back({
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.length = buffer.num_records,
|
||||
.used_types = IR::Type::F32,
|
||||
.is_storage = true, // we may not fit into UBO with large meshes
|
||||
.is_instance_data = true,
|
||||
});
|
||||
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||
|
|
|
@ -191,8 +191,10 @@ public:
|
|||
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst);
|
||||
void BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst);
|
||||
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
|
@ -202,10 +204,10 @@ public:
|
|||
// Data share
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
|
|
|
@ -415,14 +415,20 @@ void Translator::V_ADDC_U32(const GcnInst& inst) {
|
|||
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
||||
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
||||
|
||||
IR::U32 scarry;
|
||||
IR::U1 carry;
|
||||
if (inst.src_count == 3) { // VOP3
|
||||
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
|
||||
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
|
||||
if (inst.src[2].field == OperandField::VccLo) {
|
||||
carry = ir.GetVcc();
|
||||
} else if (inst.src[2].field == OperandField::ScalarGPR) {
|
||||
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else { // VOP2
|
||||
scarry = ir.GetVccLo();
|
||||
carry = ir.GetVcc();
|
||||
}
|
||||
|
||||
const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
|
||||
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
|
||||
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
|
|
@ -56,57 +56,57 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
|
||||
// Buffer load operations
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD_FORMAT(1, true, true, inst);
|
||||
return BUFFER_LOAD(1, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
return BUFFER_LOAD_FORMAT(2, true, true, inst);
|
||||
return BUFFER_LOAD(2, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
return BUFFER_LOAD_FORMAT(3, true, true, inst);
|
||||
return BUFFER_LOAD(3, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
return BUFFER_LOAD_FORMAT(4, true, true, inst);
|
||||
return BUFFER_LOAD(4, true, inst);
|
||||
|
||||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD_FORMAT(1, false, true, inst);
|
||||
return BUFFER_LOAD_FORMAT(1, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XY:
|
||||
return BUFFER_LOAD_FORMAT(2, false, true, inst);
|
||||
return BUFFER_LOAD_FORMAT(2, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
||||
return BUFFER_LOAD_FORMAT(3, false, true, inst);
|
||||
return BUFFER_LOAD_FORMAT(3, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||
return BUFFER_LOAD_FORMAT(4, false, true, inst);
|
||||
return BUFFER_LOAD_FORMAT(4, inst);
|
||||
|
||||
case Opcode::BUFFER_LOAD_DWORD:
|
||||
return BUFFER_LOAD_FORMAT(1, false, false, inst);
|
||||
return BUFFER_LOAD(1, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX2:
|
||||
return BUFFER_LOAD_FORMAT(2, false, false, inst);
|
||||
return BUFFER_LOAD(2, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX3:
|
||||
return BUFFER_LOAD_FORMAT(3, false, false, inst);
|
||||
return BUFFER_LOAD(3, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX4:
|
||||
return BUFFER_LOAD_FORMAT(4, false, false, inst);
|
||||
return BUFFER_LOAD(4, false, inst);
|
||||
|
||||
// Buffer store operations
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
return BUFFER_STORE_FORMAT(1, false, true, inst);
|
||||
return BUFFER_STORE_FORMAT(1, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XY:
|
||||
return BUFFER_STORE_FORMAT(2, false, true, inst);
|
||||
return BUFFER_STORE_FORMAT(2, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZ:
|
||||
return BUFFER_STORE_FORMAT(3, false, true, inst);
|
||||
return BUFFER_STORE_FORMAT(3, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
return BUFFER_STORE_FORMAT(4, false, true, inst);
|
||||
return BUFFER_STORE_FORMAT(4, inst);
|
||||
|
||||
case Opcode::TBUFFER_STORE_FORMAT_X:
|
||||
return BUFFER_STORE_FORMAT(1, true, true, inst);
|
||||
return BUFFER_STORE(1, true, inst);
|
||||
case Opcode::TBUFFER_STORE_FORMAT_XY:
|
||||
return BUFFER_STORE_FORMAT(2, true, true, inst);
|
||||
return BUFFER_STORE(2, true, inst);
|
||||
case Opcode::TBUFFER_STORE_FORMAT_XYZ:
|
||||
return BUFFER_STORE_FORMAT(3, true, true, inst);
|
||||
return BUFFER_STORE(3, true, inst);
|
||||
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
return BUFFER_STORE_FORMAT(1, false, false, inst);
|
||||
return BUFFER_STORE(1, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX2:
|
||||
return BUFFER_STORE_FORMAT(2, false, false, inst);
|
||||
return BUFFER_STORE(2, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
return BUFFER_STORE_FORMAT(3, false, false, inst);
|
||||
return BUFFER_STORE(3, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
return BUFFER_STORE_FORMAT(4, false, false, inst);
|
||||
return BUFFER_STORE(4, false, inst);
|
||||
|
||||
// Buffer atomic operations
|
||||
case Opcode::BUFFER_ATOMIC_ADD:
|
||||
|
@ -349,8 +349,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
|||
ir.ImageWrite(handle, body, value, {});
|
||||
}
|
||||
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format,
|
||||
const GcnInst& inst) {
|
||||
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
|
@ -370,22 +369,19 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
|
|||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.is_typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
ASSERT(info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
info.dmft == AmdGpu::DataFormat::Format32_32 ||
|
||||
info.dmft == AmdGpu::DataFormat::Format32));
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = is_format ? ir.LoadBufferFormat(num_dwords, handle, address, info)
|
||||
: ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
|
@ -396,8 +392,34 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format,
|
||||
const GcnInst& inst) {
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBufferFormat(handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
|
@ -417,45 +439,76 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form
|
|||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.is_typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
IR::Value value{};
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg<Shader::IR::F32>(src_reg);
|
||||
value = ir.GetVectorReg<IR::F32>(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2),
|
||||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
|
||||
value = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(src_reg), ir.GetVectorReg<IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 2), ir.GetVectorReg<IR::F32>(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
if (is_format) {
|
||||
ir.StoreBufferFormat(num_dwords, handle, address, value, info);
|
||||
} else {
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
|
||||
std::array<IR::Value, 4> comps{};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
comps[i] = ir.GetVectorReg<IR::F32>(src_reg + i);
|
||||
}
|
||||
for (u32 i = num_dwords; i < 4; i++) {
|
||||
comps[i] = ir.Imm32(0.f);
|
||||
}
|
||||
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBufferFormat(handle, address, value, info);
|
||||
}
|
||||
|
||||
// TODO: U64
|
||||
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue