mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-18 17:34:52 +00:00
shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)
* shader_recompiler: Add LDEXP * shader_recompiler: Add most image integer atomic ops * shader_recompiler: Implement shared memory load/store * shader_recompiler: More image atomics * externals: Update sirit * clang format * cmake: Add missing files * shader_recompiler: Fix some atomic bugs * shader_recompiler: Vs outputs * shader_recompiler: Shared mem has side-effects, fix format component order * shader_recompiler: Inline constant buffer impl * video_core: Fix regressions * Work * Fixup a few things
This commit is contained in:
parent
af3bbc33e9
commit
6ceab6dfac
69 changed files with 1597 additions and 310 deletions
|
@ -149,9 +149,15 @@ void CFG::LinkBlocks() {
|
|||
block.end_class = EndClass::Branch;
|
||||
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
||||
const auto& prev_inst = inst_list[block.end_index - 1];
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0 &&
|
||||
prev_inst.control.exp.target != 9) {
|
||||
block.end_class = EndClass::Kill;
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
||||
if (prev_inst.control.exp.target != 9) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
|
||||
exec_mask.src[0].field == OperandField::ConstZero) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace Shader::Gcn {
|
|||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
||||
std::vector<VertexAttribute> attributes;
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
@ -47,6 +47,8 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
u32 semantic_index = 0;
|
||||
while (!code_slice.atEnd()) {
|
||||
const auto inst = decoder.decodeInstruction(code_slice);
|
||||
*out_size += inst.length;
|
||||
|
||||
if (inst.opcode == Opcode::S_SETPC_B64) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,6 @@ struct VertexAttribute {
|
|||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
};
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -3429,48 +3429,48 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
|
|||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
// 17 = IMAGE_ATOMIC_ADD
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 18 = IMAGE_ATOMIC_SUB
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
{},
|
||||
// 20 = IMAGE_ATOMIC_SMIN
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
|
||||
ScalarType::Sint32},
|
||||
// 21 = IMAGE_ATOMIC_UMIN
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 22 = IMAGE_ATOMIC_SMAX
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
|
||||
ScalarType::Sint32},
|
||||
// 23 = IMAGE_ATOMIC_UMAX
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 24 = IMAGE_ATOMIC_AND
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 25 = IMAGE_ATOMIC_OR
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 26 = IMAGE_ATOMIC_XOR
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 27 = IMAGE_ATOMIC_INC
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 28 = IMAGE_ATOMIC_DEC
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 29 = IMAGE_ATOMIC_FCMPSWAP
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 30 = IMAGE_ATOMIC_FMIN
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 31 = IMAGE_ATOMIC_FMAX
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 32 = IMAGE_SAMPLE
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
|
|
|
@ -187,7 +187,7 @@ std::string DumpExpr(const Statement* stmt) {
|
|||
case StatementType::Not:
|
||||
case StatementType::Or:
|
||||
case StatementType::Variable:
|
||||
throw LogicError("Statement can't be printed");
|
||||
UNREACHABLE_MSG("Statement can't be printed");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@ -335,7 +335,7 @@ private:
|
|||
}
|
||||
// Expensive operation:
|
||||
if (!AreSiblings(goto_stmt, label_stmt)) {
|
||||
throw LogicError("Goto is not a sibling with the label");
|
||||
UNREACHABLE_MSG("Goto is not a sibling with the label");
|
||||
}
|
||||
// goto_stmt and label_stmt are guaranteed to be siblings, eliminate
|
||||
if (std::next(goto_stmt) == label_stmt) {
|
||||
|
@ -451,7 +451,7 @@ private:
|
|||
case StatementType::Loop:
|
||||
return MoveOutwardLoop(goto_stmt);
|
||||
default:
|
||||
throw LogicError("Invalid outward movement");
|
||||
UNREACHABLE_MSG("Invalid outward movement");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -486,7 +486,7 @@ private:
|
|||
case StatementType::Loop:
|
||||
break;
|
||||
default:
|
||||
throw LogicError("Invalid inward movement");
|
||||
UNREACHABLE_MSG("Invalid inward movement");
|
||||
}
|
||||
Tree& nested_tree{label_nested_stmt->children};
|
||||
Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
|
||||
|
@ -633,7 +633,8 @@ private:
|
|||
if (!stmt.block->is_dummy) {
|
||||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, inst_list.subspan(start, size), info);
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||
info);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -22,16 +22,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
|||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
if (is_pair) {
|
||||
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
|
||||
ASSERT(bit_size == 32);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0));
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||
ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1));
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
|
||||
} else if (bit_size == 64) {
|
||||
const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr));
|
||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
||||
} else {
|
||||
const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr);
|
||||
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
|
||||
ir.SetVectorReg(dst_reg, data);
|
||||
}
|
||||
}
|
||||
|
@ -41,17 +43,26 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
|||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
if (is_pair) {
|
||||
ASSERT(bit_size == 32);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
} else if (bit_size == 64) {
|
||||
const IR::U64 data = ir.PackUint2x32(
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)));
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
ir.WriteShared(bit_size, data, addr);
|
||||
} else {
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
SetDst(inst.dst[0], ir.Imm32(pc));
|
||||
}
|
||||
|
||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -5,20 +5,29 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
if (smrd.imm) {
|
||||
return smrd.offset;
|
||||
}
|
||||
if (smrd.offset == SQ_SRC_LITERAL) {
|
||||
return inst.src[1].code;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}();
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::Value base =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 dword_offset = [&] -> IR::U32 {
|
||||
|
@ -30,7 +39,9 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
}
|
||||
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
|
||||
}();
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::Value vsharp =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
|
||||
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
||||
|
||||
// Parse the assembly to generate a list of attributes.
|
||||
const auto attribs = ParseFetchShader(code);
|
||||
u32 fetch_size{};
|
||||
const auto attribs = ParseFetchShader(code, &fetch_size);
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
||||
for (const auto& attrib : attribs) {
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
@ -224,9 +240,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
attrib.instance_data);
|
||||
}
|
||||
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
info.vs_inputs.push_back({
|
||||
.fmt = buffer.num_format,
|
||||
.fmt = buffer.GetNumberFmt(),
|
||||
.binding = attrib.semantic,
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
|
@ -236,12 +252,13 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
Translator translator{block, info};
|
||||
for (const auto& inst : inst_list) {
|
||||
block_base += inst.length;
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
translator.S_MOVK(inst);
|
||||
|
@ -345,6 +362,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_BFREV_B32:
|
||||
translator.V_BFREV_B32(inst);
|
||||
break;
|
||||
case Opcode::V_LDEXP_F32:
|
||||
translator.V_LDEXP_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FRACT_F32:
|
||||
translator.V_FRACT_F32(inst);
|
||||
break;
|
||||
|
@ -374,8 +394,40 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::And, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_OR:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Or, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_XOR:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Xor, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_SMIN:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_INC:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Inc, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_DEC:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Dec, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_GET_LOD:
|
||||
translator.IMAGE_GET_LOD(inst);
|
||||
break;
|
||||
|
@ -457,9 +509,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CMP_NGT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NGE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LE_U32:
|
||||
translator.S_CMP(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
|
@ -487,6 +545,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
translator.BUFFER_LOAD_FORMAT(2, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, true, inst);
|
||||
break;
|
||||
|
@ -581,6 +645,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_I32_F32:
|
||||
translator.V_CVT_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_FLR_I32_F32:
|
||||
translator.V_CVT_FLR_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_F32:
|
||||
translator.V_SUBREV_F32(inst);
|
||||
break;
|
||||
|
@ -715,6 +782,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.V_MAD_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_I32_I24:
|
||||
case Opcode::V_MUL_U32_U24:
|
||||
translator.V_MUL_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_I32:
|
||||
|
@ -771,6 +839,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CMP_NE_U64:
|
||||
translator.V_CMP_NE_U64(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_CLASS_F32:
|
||||
translator.V_CMP_CLASS_F32(inst);
|
||||
break;
|
||||
case Opcode::V_TRUNC_F32:
|
||||
translator.V_TRUNC_F32(inst);
|
||||
break;
|
||||
|
@ -786,7 +857,11 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_ADD_U32:
|
||||
translator.S_ADD_U32(inst);
|
||||
break;
|
||||
case Opcode::S_ADDC_U32:
|
||||
translator.S_ADDC_U32(inst);
|
||||
break;
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
translator.S_SUB_U32(inst);
|
||||
break;
|
||||
// TODO: Separate implementation for legacy variants.
|
||||
|
@ -809,9 +884,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_GET_RESINFO:
|
||||
translator.IMAGE_GET_RESINFO(inst);
|
||||
break;
|
||||
case Opcode::S_BARRIER:
|
||||
translator.S_BARRIER();
|
||||
break;
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
break;
|
||||
case Opcode::DS_READ_B32:
|
||||
translator.DS_READ(32, false, false, inst);
|
||||
break;
|
||||
case Opcode::DS_READ2_B32:
|
||||
translator.DS_READ(32, false, true, inst);
|
||||
break;
|
||||
case Opcode::DS_WRITE_B32:
|
||||
translator.DS_WRITE(32, false, false, inst);
|
||||
break;
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
translator.DS_WRITE(32, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_READFIRSTLANE_B32:
|
||||
translator.V_READFIRSTLANE_B32(inst);
|
||||
break;
|
||||
case Opcode::S_GETPC_B64:
|
||||
translator.S_GETPC_B64(block_base, inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
|
|
|
@ -26,6 +26,25 @@ enum class ConditionOp : u32 {
|
|||
TRU,
|
||||
};
|
||||
|
||||
enum class AtomicOp : u32 {
|
||||
Swap,
|
||||
CmpSwap,
|
||||
Add,
|
||||
Sub,
|
||||
Smin,
|
||||
Umin,
|
||||
Smax,
|
||||
Umax,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
Inc,
|
||||
Dec,
|
||||
FCmpSwap,
|
||||
Fmin,
|
||||
Fmax,
|
||||
};
|
||||
|
||||
enum class NegateMode : u32 {
|
||||
None,
|
||||
Src1,
|
||||
|
@ -61,6 +80,8 @@ public:
|
|||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_ADD_U32(const GcnInst& inst);
|
||||
void S_SUB_U32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_ADDC_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -133,6 +154,9 @@ public:
|
|||
void V_NOT_B32(const GcnInst& inst);
|
||||
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||
void V_BFREV_B32(const GcnInst& inst);
|
||||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
@ -145,6 +169,8 @@ public:
|
|||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void S_BARRIER();
|
||||
|
||||
// MIMG
|
||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
|
@ -153,6 +179,7 @@ public:
|
|||
void IMAGE_STORE(const GcnInst& inst);
|
||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||
void IMAGE_GET_LOD(const GcnInst& inst);
|
||||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Export
|
||||
void EXP(const GcnInst& inst);
|
||||
|
@ -167,6 +194,6 @@ private:
|
|||
static std::array<bool, IR::NumScalarRegs> exec_contexts;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
||||
const IR::U32 src0 = GetSrc(inst.src[0]);
|
||||
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
|
||||
const IR::U16 src0l = ir.UConvert(16, src0);
|
||||
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
|
@ -50,11 +51,14 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
|||
};
|
||||
const bool has_flt_source =
|
||||
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
|
||||
const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
||||
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
||||
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
|
||||
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
|
||||
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
|
||||
}
|
||||
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
|
||||
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
|
||||
}
|
||||
const IR::Value result = ir.Select(flag, src1, src0);
|
||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||
}
|
||||
|
@ -502,4 +506,19 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(src0));
|
||||
}
|
||||
|
||||
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -212,10 +212,15 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
|||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
ASSERT(mimg.dmask == 0xF);
|
||||
const IR::Value value = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
|
||||
boost::container::static_vector<IR::F32, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
comps.push_back(ir.Imm32(0.f));
|
||||
continue;
|
||||
}
|
||||
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
||||
}
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
ir.ImageWrite(handle, body, value, {});
|
||||
}
|
||||
|
||||
|
@ -245,7 +250,10 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
|
|||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
|
@ -304,7 +312,10 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
|||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
|
@ -322,4 +333,48 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
|
||||
}
|
||||
|
||||
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg val_reg{inst.dst[0].code};
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value value = ir.GetVectorReg(val_reg);
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
const IR::Value prev = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.ImageAtomicExchange(handle, body, value, {});
|
||||
case AtomicOp::Add:
|
||||
return ir.ImageAtomicIAdd(handle, body, value, {});
|
||||
case AtomicOp::Smin:
|
||||
return ir.ImageAtomicIMin(handle, body, value, true, {});
|
||||
case AtomicOp::Umin:
|
||||
return ir.ImageAtomicUMin(handle, body, value, {});
|
||||
case AtomicOp::Smax:
|
||||
return ir.ImageAtomicIMax(handle, body, value, true, {});
|
||||
case AtomicOp::Umax:
|
||||
return ir.ImageAtomicUMax(handle, body, value, {});
|
||||
case AtomicOp::And:
|
||||
return ir.ImageAtomicAnd(handle, body, value, {});
|
||||
case AtomicOp::Or:
|
||||
return ir.ImageAtomicOr(handle, body, value, {});
|
||||
case AtomicOp::Xor:
|
||||
return ir.ImageAtomicXor(handle, body, value, {});
|
||||
case AtomicOp::Inc:
|
||||
return ir.ImageAtomicInc(handle, body, value, {});
|
||||
case AtomicOp::Dec:
|
||||
return ir.ImageAtomicDec(handle, body, value, {});
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (mimg.glc) {
|
||||
ir.SetVectorReg(val_reg, IR::U32{prev});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue