diff --git a/LiveRecomp/live_generator.cpp b/LiveRecomp/live_generator.cpp index ad1c336..c75b0b2 100644 --- a/LiveRecomp/live_generator.cpp +++ b/LiveRecomp/live_generator.cpp @@ -236,14 +236,10 @@ void get_gpr_values(int gpr, sljit_sw& out, sljit_sw& outw) { } } -void get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw, bool& needs_relocation) { +void get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw) { using namespace N64Recomp; bool relocation_valid = false; - // Relocations are only valid for ImmS16 and ImmU16 operands. - assert(context.reloc_type == RelocType::R_MIPS_NONE || - (operand == Operand::ImmS16 || operand == Operand::ImmU16)); - switch (operand) { case Operand::Rd: get_gpr_values(context.rd, out, outw); @@ -312,12 +308,10 @@ void get_operand_values(N64Recomp::Operand operand, const N64Recomp::Instruction outw = get_fpr_u64_context_offset(context.ft); break; case Operand::ImmU16: - relocation_valid = true; out = SLJIT_IMM; outw = (sljit_sw)(uint16_t)context.imm16; break; case Operand::ImmS16: - relocation_valid = true; out = SLJIT_IMM; outw = (sljit_sw)(int16_t)context.imm16; break; @@ -346,13 +340,6 @@ void get_operand_values(N64Recomp::Operand operand, const N64Recomp::Instruction outw = 0; break; } - if (context.reloc_type != N64Recomp::RelocType::R_MIPS_NONE) { - assert(relocation_valid && "Relocation present but not valid on the given operand!"); - needs_relocation = true; - } - else { - needs_relocation = false; - } } bool outputs_to_zero(N64Recomp::Operand output, const N64Recomp::InstructionContext& ctx) { @@ -377,25 +364,48 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr bool failed = false; sljit_sw dst; sljit_sw dstw; - bool relocation_needed_dst; sljit_sw src1; sljit_sw src1w; - bool relocation_needed_src1; sljit_sw src2; sljit_sw src2w; - bool relocation_needed_src2; - get_operand_values(op.output, ctx, dst, dstw, relocation_needed_dst); - get_operand_values(op.operands.operands[0], ctx, src1, src1w, relocation_needed_src1); - get_operand_values(op.operands.operands[1], ctx, src2, src2w, relocation_needed_src2); + get_operand_values(op.output, ctx, dst, dstw); + get_operand_values(op.operands.operands[0], ctx, src1, src1w); + get_operand_values(op.operands.operands[1], ctx, src2, src2w); - // Relocations are only valid on the second operand. - assert(!relocation_needed_dst && !relocation_needed_src1); - - // TODO perform relocation. - // If a relocation is needed for the second operand, perform the relocation and change src2/src2w to use the relocated value. - if (relocation_needed_src2) { - assert(false); - } + // If a relocation is present, perform the relocation and change src1/src1w to use the relocated value. + if (ctx.reloc_type != RelocType::R_MIPS_NONE) { + // Only allow LO16 relocations. + assert(ctx.reloc_type == RelocType::R_MIPS_LO16); + // Only allow relocations on immediates. + assert(src2 == SLJIT_IMM); + // Only allow relocations on loads and adds. + switch (op.type) { + case BinaryOpType::LD: + case BinaryOpType::LW: + case BinaryOpType::LWU: + case BinaryOpType::LH: + case BinaryOpType::LHU: + case BinaryOpType::LB: + case BinaryOpType::LBU: + case BinaryOpType::LDL: + case BinaryOpType::LDR: + case BinaryOpType::LWL: + case BinaryOpType::LWR: + case BinaryOpType::Add64: + case BinaryOpType::Add32: + break; + default: + // Relocations aren't allowed on this instruction. + assert(false); + } + // Load the relocated address into temp2. + load_relocated_address(ctx, Registers::arithmetic_temp1); + // Extract the LO16 value from the full address (sign extended lower 16 bits). + sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0); + // Replace the immediate input (src2) with the LO16 value. + src2 = Registers::arithmetic_temp1; + src2w = 0; + } // TODO validate that the unary ops are valid for the current binary op. assert(op.operands.operand_operations[0] == UnaryOpType::None || @@ -750,6 +760,20 @@ int64_t do_floor_l_d(double num) { return (int64_t)floor(num); } +void N64Recomp::LiveGenerator::load_relocated_address(const InstructionContext& ctx, int reg) const { + // Get the pointer to the section address. + int32_t* section_addr_ptr = (ctx.reloc_tag_as_reference ? inputs.reference_section_addresses : inputs.local_section_addresses) + ctx.reloc_section_index; + + // Load the section's address into R0. + sljit_emit_op1(compiler, SLJIT_MOV_S32, Registers::arithmetic_temp1, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr)); + + // Don't emit the add if the offset is zero (small optimization). + if (ctx.reloc_target_section_offset != 0) { + // Add the reloc section offset to the section's address and put the result in R0. + sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_IMM, ctx.reloc_target_section_offset); + } +} + void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const { // Skip instructions that output to $zero if (outputs_to_zero(op.output, ctx)) { @@ -758,20 +782,37 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc sljit_sw dst; sljit_sw dstw; - bool relocation_needed_dst; sljit_sw src; sljit_sw srcw; - bool relocation_needed_src; - get_operand_values(op.output, ctx, dst, dstw, relocation_needed_dst); - get_operand_values(op.input, ctx, src, srcw, relocation_needed_src); + get_operand_values(op.output, ctx, dst, dstw); + get_operand_values(op.input, ctx, src, srcw); - // Relocations aren't valid on the output operand. - assert(!relocation_needed_dst); + // If a relocation is needed for the input operand, perform the relocation and store the result directly. + if (ctx.reloc_type != RelocType::R_MIPS_NONE) { + // Only allow relocation of lui with an immediate. + if (op.operation != UnaryOpType::Lui || op.input != Operand::ImmU16) { + assert(false); + return; + } + // Only allow HI16 relocs. + if (ctx.reloc_type != RelocType::R_MIPS_HI16) { + assert(false); + return; + } + // Load the relocated address into temp1. + load_relocated_address(ctx, Registers::arithmetic_temp1); - // TODO perform relocation. - // If a relocation is needed for the input operand, perform the relocation and change src/srcw to use the relocated value. - if (relocation_needed_src) { - assert(false); + // HI16 reloc on a lui + // The 32-bit address (a) is equal to section address + section offset + // The 16-bit immediate is equal to (a - (int16_t)a) >> 16 + // Therefore, the register should be set to (int32_t)(a - (int16_t)a) as the shifts cancel out and the lower 16 bits are zero. + + // Extract a sign extended 16-bit value from the lower half of the relocated address and put it in temp2. + sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp2, 0, Registers::arithmetic_temp1, 0); + + // Subtract the sign extended 16-bit value from the full address to get the HI16 value and place it in the destination. + sljit_emit_op2(compiler, SLJIT_SUB, dst, dstw, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp2, 0); + return; } sljit_s32 jit_op = SLJIT_BREAKPOINT; @@ -986,16 +1027,25 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const InstructionContext& ctx) const { sljit_sw src; sljit_sw srcw; - bool relocation_needed_src; sljit_sw imm = (sljit_sw)(int16_t)ctx.imm16; - get_operand_values(op.value_input, ctx, src, srcw, relocation_needed_src); + get_operand_values(op.value_input, ctx, src, srcw); - // Relocations aren't valid on the input operand. - assert(!relocation_needed_src); + // Only LO16 relocs are valid on stores. + assert(ctx.reloc_type == RelocType::R_MIPS_NONE || ctx.reloc_type == RelocType::R_MIPS_LO16); - // Add the base register (rs) and the immediate to get the address and store it in the arithemtic temp. - sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs), SLJIT_IMM, imm); + if (ctx.reloc_type == RelocType::R_MIPS_LO16) { + // Load the relocated address into temp1. + load_relocated_address(ctx, Registers::arithmetic_temp1); + // Extract the LO16 value from the full address (sign extended lower 16 bits). + sljit_emit_op1(compiler, SLJIT_MOV_S16, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0); + // Add the base register (rs) to the LO16 immediate. + sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs)); + } + else { + // Add the base register (rs) and the immediate to get the address and store it in the arithemtic temp. + sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(ctx.rs), SLJIT_IMM, imm); + } switch (op.type) { case StoreOpType::SD: @@ -1186,16 +1236,14 @@ void N64Recomp::LiveGenerator::emit_branch_condition(const ConditionalBranchOp& } sljit_sw src1; sljit_sw src1w; - bool relocation_needed_src1; sljit_sw src2; sljit_sw src2w; - bool relocation_needed_src2; - get_operand_values(op.operands.operands[0], ctx, src1, src1w, relocation_needed_src1); - get_operand_values(op.operands.operands[1], ctx, src2, src2w, relocation_needed_src2); + get_operand_values(op.operands.operands[0], ctx, src1, src1w); + get_operand_values(op.operands.operands[1], ctx, src2, src2w); - // Relocations aren't valid on the input operands. - assert(!relocation_needed_src1 && !relocation_needed_src2); + // Relocations aren't valid on conditional branches. + assert(ctx.reloc_type == RelocType::R_MIPS_NONE); // Create a compare jump and track it as the pending branch jump. context->cur_branch_jump = sljit_emit_cmp(compiler, condition_type, src1, src1w, src2, src2w); diff --git a/LiveRecomp/live_recompiler_test.cpp b/LiveRecomp/live_recompiler_test.cpp index 9f9a488..c5673eb 100644 --- a/LiveRecomp/live_recompiler_test.cpp +++ b/LiveRecomp/live_recompiler_test.cpp @@ -115,17 +115,26 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te // Move the file data into the context. context.rom = std::move(file_data); + context.sections.resize(2); // Create a section for the function to exist in. - context.sections.resize(1); context.sections[0].ram_addr = text_address; context.sections[0].rom_addr = text_offset; context.sections[0].size = text_length; - context.sections[0].name = "test_section"; + context.sections[0].name = ".text"; context.sections[0].executable = true; + context.sections[0].relocatable = true; context.section_functions.resize(context.sections.size()); + // Create a section for .data (used for relocations) + context.sections[1].ram_addr = data_address; + context.sections[1].rom_addr = init_data_offset; + context.sections[1].size = data_length; + context.sections[1].name = ".data"; + context.sections[1].executable = false; + context.sections[1].relocatable = true; size_t start_func_index; uint32_t function_desc_address = 0; + uint32_t reloc_desc_address = 0; // Read any extra structs. while (next_struct_address != 0) { @@ -137,6 +146,9 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te case 1: // Function desc function_desc_address = cur_struct_address; break; + case 2: // Relocation + reloc_desc_address = cur_struct_address; + break; default: printf("Unknown struct type %u\n", struct_type); return { TestError::UnknownStructType }; @@ -198,13 +210,39 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te } } + // Check if a relocation description exists. + if (reloc_desc_address != 0) { + uint32_t num_relocs = read_u32_swap(context.rom, reloc_desc_address + 0x08); + for (uint32_t reloc_index = 0; reloc_index < num_relocs; reloc_index++) { + uint32_t cur_desc_address = reloc_desc_address + 0x0C + reloc_index * 4 * sizeof(uint32_t); + uint32_t reloc_type = read_u32_swap(context.rom, cur_desc_address + 0x00); + uint32_t reloc_section = read_u32_swap(context.rom, cur_desc_address + 0x04); + uint32_t reloc_address = read_u32_swap(context.rom, cur_desc_address + 0x08); + uint32_t reloc_target_offset = read_u32_swap(context.rom, cur_desc_address + 0x0C); + + context.sections[0].relocs.emplace_back(N64Recomp::Reloc{ + .address = reloc_address, + .target_section_offset = reloc_target_offset, + .symbol_index = 0, + .target_section = static_cast(reloc_section), + .type = static_cast(reloc_type), + .reference_symbol = false + }); + } + } + std::vector> dummy_static_funcs{}; + std::vector section_addresses{}; + section_addresses.emplace_back(text_address); + section_addresses.emplace_back(data_address); auto before_codegen = std::chrono::system_clock::now(); N64Recomp::LiveGeneratorInputs generator_inputs { .switch_error = test_switch_error, .get_function = test_get_function, + .reference_section_addresses = nullptr, + .local_section_addresses = section_addresses.data() }; // Create the sljit compiler and the generator. diff --git a/include/recompiler/live_recompiler.h b/include/recompiler/live_recompiler.h index 76fa812..8b71eb1 100644 --- a/include/recompiler/live_recompiler.h +++ b/include/recompiler/live_recompiler.h @@ -54,6 +54,8 @@ namespace N64Recomp { void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram); void (*pause_self)(uint8_t* rdram); void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index); + int32_t *reference_section_addresses; + int32_t *local_section_addresses; }; class LiveGenerator final : public Generator { public: @@ -101,6 +103,8 @@ namespace N64Recomp { void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const; void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const; void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const; + // Loads the relocated address specified by the instruction context into R0. + void load_relocated_address(const InstructionContext& ctx, int reg) const; sljit_compiler* compiler; LiveGeneratorInputs inputs; mutable std::unique_ptr context; diff --git a/src/operations.cpp b/src/operations.cpp index ba570e6..bb2b325 100644 --- a/src/operations.cpp +++ b/src/operations.cpp @@ -127,19 +127,19 @@ namespace N64Recomp { /* TODO rename to c_seq_d when fixed in rabbitizer */ { InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, // Loads - { InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} }, - { InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}, true } }, + { InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, + { InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}, true } }, }; const std::unordered_map conditional_branch_ops {