Implement relocations in live recompiler

This commit is contained in:
Mr-Wiseguy 2024-12-11 21:32:44 -05:00
parent 9fa9adbe3a
commit aa765c435f
4 changed files with 155 additions and 65 deletions

View file

@ -236,14 +236,10 @@ void get_gpr_values(int gpr, sljit_sw& out, sljit_sw& outw) {
} }
} }
void get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw, bool& needs_relocation) { void get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw) {
using namespace N64Recomp; using namespace N64Recomp;
bool relocation_valid = false; bool relocation_valid = false;
// Relocations are only valid for ImmS16 and ImmU16 operands.
assert(context.reloc_type == RelocType::R_MIPS_NONE ||
(operand == Operand::ImmS16 || operand == Operand::ImmU16));
switch (operand) { switch (operand) {
case Operand::Rd: case Operand::Rd:
get_gpr_values(context.rd, out, outw); get_gpr_values(context.rd, out, outw);
@ -312,12 +308,10 @@ void get_operand_values(N64Recomp::Operand operand, const N64Recomp::Instruction
outw = get_fpr_u64_context_offset(context.ft); outw = get_fpr_u64_context_offset(context.ft);
break; break;
case Operand::ImmU16: case Operand::ImmU16:
relocation_valid = true;
out = SLJIT_IMM; out = SLJIT_IMM;
outw = (sljit_sw)(uint16_t)context.imm16; outw = (sljit_sw)(uint16_t)context.imm16;
break; break;
case Operand::ImmS16: case Operand::ImmS16:
relocation_valid = true;
out = SLJIT_IMM; out = SLJIT_IMM;
outw = (sljit_sw)(int16_t)context.imm16; outw = (sljit_sw)(int16_t)context.imm16;
break; break;
@ -346,13 +340,6 @@ void get_operand_values(N64Recomp::Operand operand, const N64Recomp::Instruction
outw = 0; outw = 0;
break; break;
} }
if (context.reloc_type != N64Recomp::RelocType::R_MIPS_NONE) {
assert(relocation_valid && "Relocation present but not valid on the given operand!");
needs_relocation = true;
}
else {
needs_relocation = false;
}
} }
bool outputs_to_zero(N64Recomp::Operand output, const N64Recomp::InstructionContext& ctx) { bool outputs_to_zero(N64Recomp::Operand output, const N64Recomp::InstructionContext& ctx) {
@ -377,25 +364,48 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
bool failed = false; bool failed = false;
sljit_sw dst; sljit_sw dst;
sljit_sw dstw; sljit_sw dstw;
bool relocation_needed_dst;
sljit_sw src1; sljit_sw src1;
sljit_sw src1w; sljit_sw src1w;
bool relocation_needed_src1;
sljit_sw src2; sljit_sw src2;
sljit_sw src2w; sljit_sw src2w;
bool relocation_needed_src2; get_operand_values(op.output, ctx, dst, dstw);
get_operand_values(op.output, ctx, dst, dstw, relocation_needed_dst); get_operand_values(op.operands.operands[0], ctx, src1, src1w);
get_operand_values(op.operands.operands[0], ctx, src1, src1w, relocation_needed_src1); get_operand_values(op.operands.operands[1], ctx, src2, src2w);
get_operand_values(op.operands.operands[1], ctx, src2, src2w, relocation_needed_src2);
// Relocations are only valid on the second operand. // If a relocation is present, perform the relocation and change src1/src1w to use the relocated value.
assert(!relocation_needed_dst && !relocation_needed_src1); if (ctx.reloc_type != RelocType::R_MIPS_NONE) {
// Only allow LO16 relocations.
// TODO perform relocation. assert(ctx.reloc_type == RelocType::R_MIPS_LO16);
// If a relocation is needed for the second operand, perform the relocation and change src2/src2w to use the relocated value. // Only allow relocations on immediates.
if (relocation_needed_src2) { assert(src2 == SLJIT_IMM);
assert(false); // Only allow relocations on loads and adds.
} switch (op.type) {
case BinaryOpType::LD:
case BinaryOpType::LW:
case BinaryOpType::LWU:
case BinaryOpType::LH:
case BinaryOpType::LHU:
case BinaryOpType::LB:
case BinaryOpType::LBU:
case BinaryOpType::LDL:
case BinaryOpType::LDR:
case BinaryOpType::LWL:
case BinaryOpType::LWR:
case BinaryOpType::Add64:
case BinaryOpType::Add32:
break;
default:
// Relocations aren't allowed on this instruction.
assert(false);
}
// Load the relocated address into temp2.
load_relocated_address(ctx, Registers::arithmetic_temp1);
// Extract the LO16 value from the full address (sign extended lower 16 bits).
sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0);
// Replace the immediate input (src2) with the LO16 value.
src2 = Registers::arithmetic_temp1;
src2w = 0;
}
// TODO validate that the unary ops are valid for the current binary op. // TODO validate that the unary ops are valid for the current binary op.
assert(op.operands.operand_operations[0] == UnaryOpType::None || assert(op.operands.operand_operations[0] == UnaryOpType::None ||
@ -750,6 +760,20 @@ int64_t do_floor_l_d(double num) {
return (int64_t)floor(num); return (int64_t)floor(num);
} }
void N64Recomp::LiveGenerator::load_relocated_address(const InstructionContext& ctx, int reg) const {
// Get the pointer to the section address.
int32_t* section_addr_ptr = (ctx.reloc_tag_as_reference ? inputs.reference_section_addresses : inputs.local_section_addresses) + ctx.reloc_section_index;
// Load the section's address into R0.
sljit_emit_op1(compiler, SLJIT_MOV_S32, Registers::arithmetic_temp1, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr));
// Don't emit the add if the offset is zero (small optimization).
if (ctx.reloc_target_section_offset != 0) {
// Add the reloc section offset to the section's address and put the result in R0.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_IMM, ctx.reloc_target_section_offset);
}
}
void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const { void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const {
// Skip instructions that output to $zero // Skip instructions that output to $zero
if (outputs_to_zero(op.output, ctx)) { if (outputs_to_zero(op.output, ctx)) {
@ -758,20 +782,37 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
sljit_sw dst; sljit_sw dst;
sljit_sw dstw; sljit_sw dstw;
bool relocation_needed_dst;
sljit_sw src; sljit_sw src;
sljit_sw srcw; sljit_sw srcw;
bool relocation_needed_src; get_operand_values(op.output, ctx, dst, dstw);
get_operand_values(op.output, ctx, dst, dstw, relocation_needed_dst); get_operand_values(op.input, ctx, src, srcw);
get_operand_values(op.input, ctx, src, srcw, relocation_needed_src);
// Relocations aren't valid on the output operand. // If a relocation is needed for the input operand, perform the relocation and store the result directly.
assert(!relocation_needed_dst); if (ctx.reloc_type != RelocType::R_MIPS_NONE) {
// Only allow relocation of lui with an immediate.
if (op.operation != UnaryOpType::Lui || op.input != Operand::ImmU16) {
assert(false);
return;
}
// Only allow HI16 relocs.
if (ctx.reloc_type != RelocType::R_MIPS_HI16) {
assert(false);
return;
}
// Load the relocated address into temp1.
load_relocated_address(ctx, Registers::arithmetic_temp1);
// TODO perform relocation. // HI16 reloc on a lui
// If a relocation is needed for the input operand, perform the relocation and change src/srcw to use the relocated value. // The 32-bit address (a) is equal to section address + section offset
if (relocation_needed_src) { // The 16-bit immediate is equal to (a - (int16_t)a) >> 16
assert(false); // Therefore, the register should be set to (int32_t)(a - (int16_t)a) as the shifts cancel out and the lower 16 bits are zero.
// Extract a sign extended 16-bit value from the lower half of the relocated address and put it in temp2.
sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp2, 0, Registers::arithmetic_temp1, 0);
// Subtract the sign extended 16-bit value from the full address to get the HI16 value and place it in the destination.
sljit_emit_op2(compiler, SLJIT_SUB, dst, dstw, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp2, 0);
return;
} }
sljit_s32 jit_op = SLJIT_BREAKPOINT; sljit_s32 jit_op = SLJIT_BREAKPOINT;
@ -986,16 +1027,25 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const InstructionContext& ctx) const { void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const InstructionContext& ctx) const {
sljit_sw src; sljit_sw src;
sljit_sw srcw; sljit_sw srcw;
bool relocation_needed_src;
sljit_sw imm = (sljit_sw)(int16_t)ctx.imm16; sljit_sw imm = (sljit_sw)(int16_t)ctx.imm16;
get_operand_values(op.value_input, ctx, src, srcw, relocation_needed_src); get_operand_values(op.value_input, ctx, src, srcw);
// Relocations aren't valid on the input operand. // Only LO16 relocs are valid on stores.
assert(!relocation_needed_src); assert(ctx.reloc_type == RelocType::R_MIPS_NONE || ctx.reloc_type == RelocType::R_MIPS_LO16);
// Add the base register (rs) and the immediate to get the address and store it in the arithemtic temp. if (ctx.reloc_type == RelocType::R_MIPS_LO16) {
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs), SLJIT_IMM, imm); // Load the relocated address into temp1.
load_relocated_address(ctx, Registers::arithmetic_temp1);
// Extract the LO16 value from the full address (sign extended lower 16 bits).
sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0);
// Add the base register (rs) to the LO16 immediate.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs));
}
else {
// Add the base register (rs) and the immediate to get the address and store it in the arithemtic temp.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs), SLJIT_IMM, imm);
}
switch (op.type) { switch (op.type) {
case StoreOpType::SD: case StoreOpType::SD:
@ -1186,16 +1236,14 @@ void N64Recomp::LiveGenerator::emit_branch_condition(const ConditionalBranchOp&
} }
sljit_sw src1; sljit_sw src1;
sljit_sw src1w; sljit_sw src1w;
bool relocation_needed_src1;
sljit_sw src2; sljit_sw src2;
sljit_sw src2w; sljit_sw src2w;
bool relocation_needed_src2;
get_operand_values(op.operands.operands[0], ctx, src1, src1w, relocation_needed_src1); get_operand_values(op.operands.operands[0], ctx, src1, src1w);
get_operand_values(op.operands.operands[1], ctx, src2, src2w, relocation_needed_src2); get_operand_values(op.operands.operands[1], ctx, src2, src2w);
// Relocations aren't valid on the input operands. // Relocations aren't valid on conditional branches.
assert(!relocation_needed_src1 && !relocation_needed_src2); assert(ctx.reloc_type == RelocType::R_MIPS_NONE);
// Create a compare jump and track it as the pending branch jump. // Create a compare jump and track it as the pending branch jump.
context->cur_branch_jump = sljit_emit_cmp(compiler, condition_type, src1, src1w, src2, src2w); context->cur_branch_jump = sljit_emit_cmp(compiler, condition_type, src1, src1w, src2, src2w);

View file

@ -115,17 +115,26 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
// Move the file data into the context. // Move the file data into the context.
context.rom = std::move(file_data); context.rom = std::move(file_data);
context.sections.resize(2);
// Create a section for the function to exist in. // Create a section for the function to exist in.
context.sections.resize(1);
context.sections[0].ram_addr = text_address; context.sections[0].ram_addr = text_address;
context.sections[0].rom_addr = text_offset; context.sections[0].rom_addr = text_offset;
context.sections[0].size = text_length; context.sections[0].size = text_length;
context.sections[0].name = "test_section"; context.sections[0].name = ".text";
context.sections[0].executable = true; context.sections[0].executable = true;
context.sections[0].relocatable = true;
context.section_functions.resize(context.sections.size()); context.section_functions.resize(context.sections.size());
// Create a section for .data (used for relocations)
context.sections[1].ram_addr = data_address;
context.sections[1].rom_addr = init_data_offset;
context.sections[1].size = data_length;
context.sections[1].name = ".data";
context.sections[1].executable = false;
context.sections[1].relocatable = true;
size_t start_func_index; size_t start_func_index;
uint32_t function_desc_address = 0; uint32_t function_desc_address = 0;
uint32_t reloc_desc_address = 0;
// Read any extra structs. // Read any extra structs.
while (next_struct_address != 0) { while (next_struct_address != 0) {
@ -137,6 +146,9 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
case 1: // Function desc case 1: // Function desc
function_desc_address = cur_struct_address; function_desc_address = cur_struct_address;
break; break;
case 2: // Relocation
reloc_desc_address = cur_struct_address;
break;
default: default:
printf("Unknown struct type %u\n", struct_type); printf("Unknown struct type %u\n", struct_type);
return { TestError::UnknownStructType }; return { TestError::UnknownStructType };
@ -198,13 +210,39 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
} }
} }
// Check if a relocation description exists.
if (reloc_desc_address != 0) {
uint32_t num_relocs = read_u32_swap(context.rom, reloc_desc_address + 0x08);
for (uint32_t reloc_index = 0; reloc_index < num_relocs; reloc_index++) {
uint32_t cur_desc_address = reloc_desc_address + 0x0C + reloc_index * 4 * sizeof(uint32_t);
uint32_t reloc_type = read_u32_swap(context.rom, cur_desc_address + 0x00);
uint32_t reloc_section = read_u32_swap(context.rom, cur_desc_address + 0x04);
uint32_t reloc_address = read_u32_swap(context.rom, cur_desc_address + 0x08);
uint32_t reloc_target_offset = read_u32_swap(context.rom, cur_desc_address + 0x0C);
context.sections[0].relocs.emplace_back(N64Recomp::Reloc{
.address = reloc_address,
.target_section_offset = reloc_target_offset,
.symbol_index = 0,
.target_section = static_cast<uint16_t>(reloc_section),
.type = static_cast<N64Recomp::RelocType>(reloc_type),
.reference_symbol = false
});
}
}
std::vector<std::vector<uint32_t>> dummy_static_funcs{}; std::vector<std::vector<uint32_t>> dummy_static_funcs{};
std::vector<int32_t> section_addresses{};
section_addresses.emplace_back(text_address);
section_addresses.emplace_back(data_address);
auto before_codegen = std::chrono::system_clock::now(); auto before_codegen = std::chrono::system_clock::now();
N64Recomp::LiveGeneratorInputs generator_inputs { N64Recomp::LiveGeneratorInputs generator_inputs {
.switch_error = test_switch_error, .switch_error = test_switch_error,
.get_function = test_get_function, .get_function = test_get_function,
.reference_section_addresses = nullptr,
.local_section_addresses = section_addresses.data()
}; };
// Create the sljit compiler and the generator. // Create the sljit compiler and the generator.

View file

@ -54,6 +54,8 @@ namespace N64Recomp {
void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram); void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram);
void (*pause_self)(uint8_t* rdram); void (*pause_self)(uint8_t* rdram);
void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index); void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index);
int32_t *reference_section_addresses;
int32_t *local_section_addresses;
}; };
class LiveGenerator final : public Generator { class LiveGenerator final : public Generator {
public: public:
@ -101,6 +103,8 @@ namespace N64Recomp {
void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const; void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const; void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const; void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
// Loads the relocated address specified by the instruction context into R0.
void load_relocated_address(const InstructionContext& ctx, int reg) const;
sljit_compiler* compiler; sljit_compiler* compiler;
LiveGeneratorInputs inputs; LiveGeneratorInputs inputs;
mutable std::unique_ptr<LiveGeneratorContext> context; mutable std::unique_ptr<LiveGeneratorContext> context;

View file

@ -127,19 +127,19 @@ namespace N64Recomp {
/* TODO rename to c_seq_d when fixed in rabbitizer */ /* TODO rename to c_seq_d when fixed in rabbitizer */
{ InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, { InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
// Loads // Loads
{ InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, { InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}, true } }, { InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}, true } },
}; };
const std::unordered_map<InstrId, ConditionalBranchOp> conditional_branch_ops { const std::unordered_map<InstrId, ConditionalBranchOp> conditional_branch_ops {