Implement relocations in live recompiler

This commit is contained in:
Mr-Wiseguy 2024-12-11 21:32:44 -05:00
parent 9fa9adbe3a
commit aa765c435f
4 changed files with 155 additions and 65 deletions

View file

@ -236,14 +236,10 @@ void get_gpr_values(int gpr, sljit_sw& out, sljit_sw& outw) {
}
}
void get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw, bool& needs_relocation) {
void get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw) {
using namespace N64Recomp;
bool relocation_valid = false;
// Relocations are only valid for ImmS16 and ImmU16 operands.
assert(context.reloc_type == RelocType::R_MIPS_NONE ||
(operand == Operand::ImmS16 || operand == Operand::ImmU16));
switch (operand) {
case Operand::Rd:
get_gpr_values(context.rd, out, outw);
@ -312,12 +308,10 @@ void get_operand_values(N64Recomp::Operand operand, const N64Recomp::Instruction
outw = get_fpr_u64_context_offset(context.ft);
break;
case Operand::ImmU16:
relocation_valid = true;
out = SLJIT_IMM;
outw = (sljit_sw)(uint16_t)context.imm16;
break;
case Operand::ImmS16:
relocation_valid = true;
out = SLJIT_IMM;
outw = (sljit_sw)(int16_t)context.imm16;
break;
@ -346,13 +340,6 @@ void get_operand_values(N64Recomp::Operand operand, const N64Recomp::Instruction
outw = 0;
break;
}
if (context.reloc_type != N64Recomp::RelocType::R_MIPS_NONE) {
assert(relocation_valid && "Relocation present but not valid on the given operand!");
needs_relocation = true;
}
else {
needs_relocation = false;
}
}
bool outputs_to_zero(N64Recomp::Operand output, const N64Recomp::InstructionContext& ctx) {
@ -377,25 +364,48 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
bool failed = false;
sljit_sw dst;
sljit_sw dstw;
bool relocation_needed_dst;
sljit_sw src1;
sljit_sw src1w;
bool relocation_needed_src1;
sljit_sw src2;
sljit_sw src2w;
bool relocation_needed_src2;
get_operand_values(op.output, ctx, dst, dstw, relocation_needed_dst);
get_operand_values(op.operands.operands[0], ctx, src1, src1w, relocation_needed_src1);
get_operand_values(op.operands.operands[1], ctx, src2, src2w, relocation_needed_src2);
get_operand_values(op.output, ctx, dst, dstw);
get_operand_values(op.operands.operands[0], ctx, src1, src1w);
get_operand_values(op.operands.operands[1], ctx, src2, src2w);
// Relocations are only valid on the second operand.
assert(!relocation_needed_dst && !relocation_needed_src1);
// TODO perform relocation.
// If a relocation is needed for the second operand, perform the relocation and change src2/src2w to use the relocated value.
if (relocation_needed_src2) {
// If a relocation is present, perform the relocation and change src1/src1w to use the relocated value.
if (ctx.reloc_type != RelocType::R_MIPS_NONE) {
// Only allow LO16 relocations.
assert(ctx.reloc_type == RelocType::R_MIPS_LO16);
// Only allow relocations on immediates.
assert(src2 == SLJIT_IMM);
// Only allow relocations on loads and adds.
switch (op.type) {
case BinaryOpType::LD:
case BinaryOpType::LW:
case BinaryOpType::LWU:
case BinaryOpType::LH:
case BinaryOpType::LHU:
case BinaryOpType::LB:
case BinaryOpType::LBU:
case BinaryOpType::LDL:
case BinaryOpType::LDR:
case BinaryOpType::LWL:
case BinaryOpType::LWR:
case BinaryOpType::Add64:
case BinaryOpType::Add32:
break;
default:
// Relocations aren't allowed on this instruction.
assert(false);
}
// Load the relocated address into temp2.
load_relocated_address(ctx, Registers::arithmetic_temp1);
// Extract the LO16 value from the full address (sign extended lower 16 bits).
sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0);
// Replace the immediate input (src2) with the LO16 value.
src2 = Registers::arithmetic_temp1;
src2w = 0;
}
// TODO validate that the unary ops are valid for the current binary op.
assert(op.operands.operand_operations[0] == UnaryOpType::None ||
@ -750,6 +760,20 @@ int64_t do_floor_l_d(double num) {
return (int64_t)floor(num);
}
void N64Recomp::LiveGenerator::load_relocated_address(const InstructionContext& ctx, int reg) const {
// Get the pointer to the section address.
int32_t* section_addr_ptr = (ctx.reloc_tag_as_reference ? inputs.reference_section_addresses : inputs.local_section_addresses) + ctx.reloc_section_index;
// Load the section's address into R0.
sljit_emit_op1(compiler, SLJIT_MOV_S32, Registers::arithmetic_temp1, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr));
// Don't emit the add if the offset is zero (small optimization).
if (ctx.reloc_target_section_offset != 0) {
// Add the reloc section offset to the section's address and put the result in R0.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_IMM, ctx.reloc_target_section_offset);
}
}
void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const {
// Skip instructions that output to $zero
if (outputs_to_zero(op.output, ctx)) {
@ -758,20 +782,37 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
sljit_sw dst;
sljit_sw dstw;
bool relocation_needed_dst;
sljit_sw src;
sljit_sw srcw;
bool relocation_needed_src;
get_operand_values(op.output, ctx, dst, dstw, relocation_needed_dst);
get_operand_values(op.input, ctx, src, srcw, relocation_needed_src);
get_operand_values(op.output, ctx, dst, dstw);
get_operand_values(op.input, ctx, src, srcw);
// Relocations aren't valid on the output operand.
assert(!relocation_needed_dst);
// TODO perform relocation.
// If a relocation is needed for the input operand, perform the relocation and change src/srcw to use the relocated value.
if (relocation_needed_src) {
// If a relocation is needed for the input operand, perform the relocation and store the result directly.
if (ctx.reloc_type != RelocType::R_MIPS_NONE) {
// Only allow relocation of lui with an immediate.
if (op.operation != UnaryOpType::Lui || op.input != Operand::ImmU16) {
assert(false);
return;
}
// Only allow HI16 relocs.
if (ctx.reloc_type != RelocType::R_MIPS_HI16) {
assert(false);
return;
}
// Load the relocated address into temp1.
load_relocated_address(ctx, Registers::arithmetic_temp1);
// HI16 reloc on a lui
// The 32-bit address (a) is equal to section address + section offset
// The 16-bit immediate is equal to (a - (int16_t)a) >> 16
// Therefore, the register should be set to (int32_t)(a - (int16_t)a) as the shifts cancel out and the lower 16 bits are zero.
// Extract a sign extended 16-bit value from the lower half of the relocated address and put it in temp2.
sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp2, 0, Registers::arithmetic_temp1, 0);
// Subtract the sign extended 16-bit value from the full address to get the HI16 value and place it in the destination.
sljit_emit_op2(compiler, SLJIT_SUB, dst, dstw, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp2, 0);
return;
}
sljit_s32 jit_op = SLJIT_BREAKPOINT;
@ -986,16 +1027,25 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const InstructionContext& ctx) const {
sljit_sw src;
sljit_sw srcw;
bool relocation_needed_src;
sljit_sw imm = (sljit_sw)(int16_t)ctx.imm16;
get_operand_values(op.value_input, ctx, src, srcw, relocation_needed_src);
get_operand_values(op.value_input, ctx, src, srcw);
// Relocations aren't valid on the input operand.
assert(!relocation_needed_src);
// Only LO16 relocs are valid on stores.
assert(ctx.reloc_type == RelocType::R_MIPS_NONE || ctx.reloc_type == RelocType::R_MIPS_LO16);
if (ctx.reloc_type == RelocType::R_MIPS_LO16) {
// Load the relocated address into temp1.
load_relocated_address(ctx, Registers::arithmetic_temp1);
// Extract the LO16 value from the full address (sign extended lower 16 bits).
sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0);
// Add the base register (rs) to the LO16 immediate.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs));
}
else {
// Add the base register (rs) and the immediate to get the address and store it in the arithemtic temp.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs), SLJIT_IMM, imm);
}
switch (op.type) {
case StoreOpType::SD:
@ -1186,16 +1236,14 @@ void N64Recomp::LiveGenerator::emit_branch_condition(const ConditionalBranchOp&
}
sljit_sw src1;
sljit_sw src1w;
bool relocation_needed_src1;
sljit_sw src2;
sljit_sw src2w;
bool relocation_needed_src2;
get_operand_values(op.operands.operands[0], ctx, src1, src1w, relocation_needed_src1);
get_operand_values(op.operands.operands[1], ctx, src2, src2w, relocation_needed_src2);
get_operand_values(op.operands.operands[0], ctx, src1, src1w);
get_operand_values(op.operands.operands[1], ctx, src2, src2w);
// Relocations aren't valid on the input operands.
assert(!relocation_needed_src1 && !relocation_needed_src2);
// Relocations aren't valid on conditional branches.
assert(ctx.reloc_type == RelocType::R_MIPS_NONE);
// Create a compare jump and track it as the pending branch jump.
context->cur_branch_jump = sljit_emit_cmp(compiler, condition_type, src1, src1w, src2, src2w);

View file

@ -115,17 +115,26 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
// Move the file data into the context.
context.rom = std::move(file_data);
context.sections.resize(2);
// Create a section for the function to exist in.
context.sections.resize(1);
context.sections[0].ram_addr = text_address;
context.sections[0].rom_addr = text_offset;
context.sections[0].size = text_length;
context.sections[0].name = "test_section";
context.sections[0].name = ".text";
context.sections[0].executable = true;
context.sections[0].relocatable = true;
context.section_functions.resize(context.sections.size());
// Create a section for .data (used for relocations)
context.sections[1].ram_addr = data_address;
context.sections[1].rom_addr = init_data_offset;
context.sections[1].size = data_length;
context.sections[1].name = ".data";
context.sections[1].executable = false;
context.sections[1].relocatable = true;
size_t start_func_index;
uint32_t function_desc_address = 0;
uint32_t reloc_desc_address = 0;
// Read any extra structs.
while (next_struct_address != 0) {
@ -137,6 +146,9 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
case 1: // Function desc
function_desc_address = cur_struct_address;
break;
case 2: // Relocation
reloc_desc_address = cur_struct_address;
break;
default:
printf("Unknown struct type %u\n", struct_type);
return { TestError::UnknownStructType };
@ -198,13 +210,39 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
}
}
// Check if a relocation description exists.
if (reloc_desc_address != 0) {
uint32_t num_relocs = read_u32_swap(context.rom, reloc_desc_address + 0x08);
for (uint32_t reloc_index = 0; reloc_index < num_relocs; reloc_index++) {
uint32_t cur_desc_address = reloc_desc_address + 0x0C + reloc_index * 4 * sizeof(uint32_t);
uint32_t reloc_type = read_u32_swap(context.rom, cur_desc_address + 0x00);
uint32_t reloc_section = read_u32_swap(context.rom, cur_desc_address + 0x04);
uint32_t reloc_address = read_u32_swap(context.rom, cur_desc_address + 0x08);
uint32_t reloc_target_offset = read_u32_swap(context.rom, cur_desc_address + 0x0C);
context.sections[0].relocs.emplace_back(N64Recomp::Reloc{
.address = reloc_address,
.target_section_offset = reloc_target_offset,
.symbol_index = 0,
.target_section = static_cast<uint16_t>(reloc_section),
.type = static_cast<N64Recomp::RelocType>(reloc_type),
.reference_symbol = false
});
}
}
std::vector<std::vector<uint32_t>> dummy_static_funcs{};
std::vector<int32_t> section_addresses{};
section_addresses.emplace_back(text_address);
section_addresses.emplace_back(data_address);
auto before_codegen = std::chrono::system_clock::now();
N64Recomp::LiveGeneratorInputs generator_inputs {
.switch_error = test_switch_error,
.get_function = test_get_function,
.reference_section_addresses = nullptr,
.local_section_addresses = section_addresses.data()
};
// Create the sljit compiler and the generator.

View file

@ -54,6 +54,8 @@ namespace N64Recomp {
void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram);
void (*pause_self)(uint8_t* rdram);
void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index);
int32_t *reference_section_addresses;
int32_t *local_section_addresses;
};
class LiveGenerator final : public Generator {
public:
@ -101,6 +103,8 @@ namespace N64Recomp {
void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
// Loads the relocated address specified by the instruction context into R0.
void load_relocated_address(const InstructionContext& ctx, int reg) const;
sljit_compiler* compiler;
LiveGeneratorInputs inputs;
mutable std::unique_ptr<LiveGeneratorContext> context;

View file

@ -127,19 +127,19 @@ namespace N64Recomp {
/* TODO rename to c_seq_d when fixed in rabbitizer */
{ InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
// Loads
{ InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}, true } },
{ InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}, true } },
};
const std::unordered_map<InstrId, ConditionalBranchOp> conditional_branch_ops {