diff --git a/LiveRecomp/live_generator.cpp b/LiveRecomp/live_generator.cpp index d3a5f48..0f7bea4 100644 --- a/LiveRecomp/live_generator.cpp +++ b/LiveRecomp/live_generator.cpp @@ -11,6 +11,8 @@ #include "sljitLir.h" +static_assert(sizeof(void*) >= sizeof(sljit_uw), "`void*` must be able to hold a `sljit_uw` value for rewritable jumps!"); + constexpr uint64_t rdram_offset = 0xFFFFFFFF80000000ULL; void N64Recomp::live_recompiler_init() { @@ -31,7 +33,6 @@ namespace Registers { constexpr int arithmetic_temp2 = SLJIT_R1; constexpr int arithmetic_temp3 = SLJIT_R2; constexpr int arithmetic_temp4 = SLJIT_R3; - constexpr int float_temp = SLJIT_FR0; } struct InnerCall { @@ -40,7 +41,8 @@ struct InnerCall { }; struct ReferenceSymbolCall { - uint16_t reference; + N64Recomp::SymbolReference reference; + sljit_jump* jump; }; struct SwitchErrorJump { @@ -56,8 +58,14 @@ struct N64Recomp::LiveGeneratorContext { std::vector func_labels; std::vector inner_calls; std::vector> switch_jump_labels; - // See LiveGeneratorOutput::jump_tables for info. - std::vector jump_tables; + // See LiveGeneratorOutput::jump_tables for info. Contains sljit labels so they can be linked after recompilation. + std::vector, std::unique_ptr>> unlinked_jump_tables; + // Jump tables for the current function being recompiled. + std::vector> pending_jump_tables; + // See LiveGeneratorOutput::reference_symbol_jumps for info. + std::vector> reference_symbol_jumps; + // See LiveGeneratorOutput::import_jumps_by_index for info. + std::unordered_multimap import_jumps_by_index; std::vector switch_error_jumps; sljit_jump* cur_branch_jump; }; @@ -78,6 +86,11 @@ N64Recomp::LiveGenerator::~LiveGenerator() { N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() { LiveGeneratorOutput ret{}; + if (errored) { + ret.good = false; + return ret; + } + ret.good = true; // Populate all the pending inner function calls. @@ -147,34 +160,37 @@ N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() { ret.functions[func_index] = reinterpret_cast(sljit_get_label_addr(func_label)); } } + context->func_labels.clear(); - // Populate all the switch case addresses. - bool invalid_switch = false; - for (size_t switch_index = 0; switch_index < context->switch_jump_labels.size(); switch_index++) { - const std::vector& cur_labels = context->switch_jump_labels[switch_index]; - void** cur_jump_table = context->jump_tables[switch_index]; - for (size_t case_index = 0; case_index < cur_labels.size(); case_index++) { - // Find the label. - auto find_it = context->labels.find(cur_labels[case_index]); - if (find_it == context->labels.end()) { - // Label not found, invalid switch. - // Don't return immediately, as we need to ensure that all the jump tables end up in ret - // so that it cleans them up in its destructor. - invalid_switch = true; - break; - } + // Get the reference symbol jump instruction addresses. + ret.reference_symbol_jumps.resize(context->reference_symbol_jumps.size()); + for (size_t jump_index = 0; jump_index < context->reference_symbol_jumps.size(); jump_index++) { + ReferenceJumpDetails& details = context->reference_symbol_jumps[jump_index].first; + sljit_jump* jump = context->reference_symbol_jumps[jump_index].second; - // Get the label's address and place it in the jump table. - cur_jump_table[case_index] = reinterpret_cast(sljit_get_label_addr(find_it->second)); + ret.reference_symbol_jumps[jump_index].first = details; + ret.reference_symbol_jumps[jump_index].second = reinterpret_cast(jump->addr); + } + context->reference_symbol_jumps.clear(); + + // Get the import jump instruction addresses. + ret.import_jumps_by_index.reserve(context->import_jumps_by_index.size()); + for (auto& [jump_index, jump] : context->import_jumps_by_index) { + ret.import_jumps_by_index.emplace(jump_index, reinterpret_cast(jump->addr)); + } + context->import_jumps_by_index.clear(); + + // Populate label addresses for the jump tables and place them in the output. + for (auto& [labels, jump_table] : context->unlinked_jump_tables) { + for (size_t entry_index = 0; entry_index < labels.size(); entry_index++) { + sljit_label* cur_label = labels[entry_index]; + jump_table[entry_index] = reinterpret_cast(sljit_get_label_addr(cur_label)); } - ret.jump_tables.emplace_back(cur_jump_table); + ret.jump_tables.emplace_back(std::move(jump_table)); } - context->switch_jump_labels.clear(); - context->jump_tables.clear(); + context->unlinked_jump_tables.clear(); - if (invalid_switch) { - return { }; - } + ret.executable_offset = sljit_get_executable_offset(compiler); sljit_free_compiler(compiler); compiler = nullptr; @@ -188,16 +204,26 @@ N64Recomp::LiveGeneratorOutput::~LiveGeneratorOutput() { sljit_free_code(code, nullptr); code = nullptr; } - - for (const char* literal : string_literals) { - delete[] literal; - } - string_literals.clear(); +} - for (void** jump_table : jump_tables) { - delete[] jump_table; +size_t N64Recomp::LiveGeneratorOutput::num_reference_symbol_jumps() const { + return reference_symbol_jumps.size(); +} + +void N64Recomp::LiveGeneratorOutput::set_reference_symbol_jump(size_t jump_index, recomp_func_t* func) { + const auto& jump_entry = reference_symbol_jumps[jump_index]; + sljit_set_jump_addr(reinterpret_cast(jump_entry.second), reinterpret_cast(func), executable_offset); +} + +N64Recomp::ReferenceJumpDetails N64Recomp::LiveGeneratorOutput::get_reference_symbol_jump_details(size_t jump_index) { + return reference_symbol_jumps[jump_index].first; +} + +void N64Recomp::LiveGeneratorOutput::populate_import_symbol_jumps(size_t import_index, recomp_func_t* func) { + auto find_range = import_jumps_by_index.equal_range(import_index); + for (auto it = find_range.first; it != find_range.second; ++it) { + sljit_set_jump_addr(reinterpret_cast(it->second), reinterpret_cast(func), executable_offset); } - jump_tables.clear(); } constexpr int get_gpr_context_offset(int gpr_index) { @@ -241,7 +267,6 @@ void get_gpr_values(int gpr, sljit_sw& out, sljit_sw& outw) { bool get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw) { using namespace N64Recomp; - bool relocation_valid = false; switch (operand) { case Operand::Rd: @@ -438,6 +463,8 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr } if (op.operands.operand_operations[1] != UnaryOpType::None && + op.operands.operand_operations[1] != UnaryOpType::ToU64 && + op.operands.operand_operations[1] != UnaryOpType::ToS64 && op.operands.operand_operations[1] != UnaryOpType::Mask5 && // Only for 32-bit shifts op.operands.operand_operations[1] != UnaryOpType::Mask6) // Only for 64-bit shifts { @@ -455,7 +482,7 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr sljit_emit_op1(this->compiler, SLJIT_MOV_P, dst, dstw, Registers::arithmetic_temp1, 0); }; - auto do_op32 = [dst, dstw, src1, src1w, src2, src2w, this, &sign_extend_and_store](sljit_s32 op) { + auto do_op32 = [src1, src1w, src2, src2w, this, &sign_extend_and_store](sljit_s32 op) { sljit_emit_op2(this->compiler, op, Registers::arithmetic_temp1, 0, src1, src1w, src2, src2w); sign_extend_and_store(); }; @@ -468,7 +495,7 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr sljit_emit_fop2(this->compiler, op, dst, dstw, src1, src1w, src2, src2w); }; - auto do_load_op = [dst, dstw, src1, src1w, src2, src2w, &ctx, this](sljit_s32 op, int address_xor) { + auto do_load_op = [dst, dstw, src1, src1w, src2, src2w, this](sljit_s32 op, int address_xor) { // TODO 0 immediate optimization. // Add the base and immediate into the arithemtic temp. @@ -486,7 +513,7 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, Registers::arithmetic_temp1, 0); }; - auto do_compare_op = [cmp_unsigned, dst, dstw, src1, src1w, src2, src2w, &ctx, this](sljit_s32 op_unsigned, sljit_s32 op_signed) { + auto do_compare_op = [cmp_unsigned, dst, dstw, src1, src1w, src2, src2w, this](sljit_s32 op_unsigned, sljit_s32 op_signed) { // Pick the operation based on the signedness of the comparison. sljit_s32 op = cmp_unsigned ? op_unsigned : op_signed; @@ -506,6 +533,18 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr sljit_emit_op_flags(compiler, SLJIT_MOV, dst, dstw, op); }; + auto do_float_compare_op = [dst, dstw, src1, src1w, src2, src2w, this](sljit_s32 flag_op, sljit_s32 set_op, bool double_precision) { + // Pick the operation based on the signedness of the comparison. + sljit_s32 compare_op = set_op | (double_precision ? SLJIT_CMP_F64 : SLJIT_CMP_F32); + + // Perform the comparison with the determined operation. + // Float comparisons use fop1 and put the left hand side in dst. + sljit_emit_fop1(compiler, compare_op, src1, src1w, src2, src2w); + + // Move the operation's flag into the destination. + sljit_emit_op_flags(compiler, SLJIT_MOV, dst, dstw, flag_op); + }; + auto do_unaligned_load_op = [dst, dstw, src1, src1w, src2, src2w, this](bool left, bool doubleword) { // TODO 0 immediate optimization. @@ -691,6 +730,24 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr case BinaryOpType::GreaterEq: do_compare_op(SLJIT_GREATER_EQUAL, SLJIT_SIG_GREATER_EQUAL); break; + case BinaryOpType::EqualF32: + do_float_compare_op(SLJIT_F_EQUAL, SLJIT_SET_F_EQUAL, false); + break; + case BinaryOpType::LessF32: + do_float_compare_op(SLJIT_F_LESS, SLJIT_SET_F_LESS, false); + break; + case BinaryOpType::LessEqF32: + do_float_compare_op(SLJIT_F_LESS_EQUAL, SLJIT_SET_F_LESS_EQUAL, false); + break; + case BinaryOpType::EqualF64: + do_float_compare_op(SLJIT_F_EQUAL, SLJIT_SET_F_EQUAL, true); + break; + case BinaryOpType::LessF64: + do_float_compare_op(SLJIT_F_LESS, SLJIT_SET_F_LESS, true); + break; + case BinaryOpType::LessEqF64: + do_float_compare_op(SLJIT_F_LESS_EQUAL, SLJIT_SET_F_LESS_EQUAL, true); + break; // Loads case BinaryOpType::LD: @@ -792,13 +849,13 @@ void N64Recomp::LiveGenerator::load_relocated_address(const InstructionContext& // Get the pointer to the section address. int32_t* section_addr_ptr = (ctx.reloc_tag_as_reference ? inputs.reference_section_addresses : inputs.local_section_addresses) + ctx.reloc_section_index; - // Load the section's address into R0. - sljit_emit_op1(compiler, SLJIT_MOV_S32, Registers::arithmetic_temp1, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr)); + // Load the section's address into the target register. + sljit_emit_op1(compiler, SLJIT_MOV_S32, reg, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr)); // Don't emit the add if the offset is zero (small optimization). if (ctx.reloc_target_section_offset != 0) { // Add the reloc section offset to the section's address and put the result in R0. - sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_IMM, ctx.reloc_target_section_offset); + sljit_emit_op2(compiler, SLJIT_ADD, reg, 0, reg, 0, SLJIT_IMM, ctx.reloc_target_section_offset); } } @@ -853,7 +910,6 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc sljit_s32 jit_op = SLJIT_BREAKPOINT; - bool failed = false; bool float_op = false; bool func_float_op = false; @@ -1200,12 +1256,52 @@ void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const Instruc void N64Recomp::LiveGenerator::emit_function_start(const std::string& function_name, size_t func_index) const { context->function_name = function_name; context->func_labels[func_index] = sljit_emit_label(compiler); + // sljit_emit_op0(compiler, SLJIT_BREAKPOINT); sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_FLOAT(1), 5 | SLJIT_ENTER_FLOAT(0), 0); sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); } void N64Recomp::LiveGenerator::emit_function_end() const { - // Nothing to do here. + // Check that all jumps have been paired to a label. + if (!context->pending_jumps.empty()) { + assert(false); + errored = true; + } + + // Populate the labels for pending switches and move them into the unlinked jump tables. + bool invalid_switch = false; + for (size_t switch_index = 0; switch_index < context->switch_jump_labels.size(); switch_index++) { + const std::vector& cur_labels = context->switch_jump_labels[switch_index]; + std::vector cur_label_addrs{}; + cur_label_addrs.resize(cur_labels.size()); + for (size_t case_index = 0; case_index < cur_labels.size(); case_index++) { + // Find the label. + auto find_it = context->labels.find(cur_labels[case_index]); + if (find_it == context->labels.end()) { + // Label not found, invalid switch. + // Track this in a variable instead of returning immediately so that the pending labels are still cleared. + invalid_switch = true; + break; + } + cur_label_addrs[case_index] = find_it->second; + } + context->unlinked_jump_tables.emplace_back( + std::make_pair, std::unique_ptr>( + std::move(cur_label_addrs), + std::move(context->pending_jump_tables[switch_index]) + ) + ); + } + context->switch_jump_labels.clear(); + context->pending_jump_tables.clear(); + + // Clear the labels to prevent labels from one function being jumped to by another. + context->labels.clear(); + + if (invalid_switch) { + assert(false); + errored = true; + } } void N64Recomp::LiveGenerator::emit_function_call_lookup(uint32_t addr) const { @@ -1244,13 +1340,33 @@ void N64Recomp::LiveGenerator::emit_function_call_by_register(int reg) const { sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P, P), SLJIT_R2, 0); } -void N64Recomp::LiveGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const { - const N64Recomp::ReferenceSymbol& sym = context.get_reference_symbol(section_index, symbol_index); - assert(false); - errored = true; +void N64Recomp::LiveGenerator::emit_function_call_reference_symbol(const Context&, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const { + (void)symbol_index; + + // Load rdram and ctx into R0 and R1. + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); + // sljit_emit_op0(compiler, SLJIT_BREAKPOINT); + // Call the function and save the jump to set its label later on. + sljit_jump* call_jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP, SLJIT_ARGS2V(P, P)); + // Set a dummy jump value, this will get replaced during reference/import symbol jump population. + if (section_index == N64Recomp::SectionImport) { + sljit_set_target(call_jump, sljit_uw(-1)); + context->import_jumps_by_index.emplace(symbol_index, call_jump); + } + else { + sljit_set_target(call_jump, sljit_uw(-2)); + context->reference_symbol_jumps.emplace_back(std::make_pair( + ReferenceJumpDetails{ + .section = section_index, + .section_offset = target_section_offset + }, + call_jump + )); + } } -void N64Recomp::LiveGenerator::emit_function_call(const Context& recompiler_context, size_t function_index) const { +void N64Recomp::LiveGenerator::emit_function_call(const Context&, size_t function_index) const { // Load rdram and ctx into R0 and R1. sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); @@ -1290,6 +1406,8 @@ void N64Recomp::LiveGenerator::emit_label(const std::string& label_name) const { } void N64Recomp::LiveGenerator::emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const { + (void)jtbl; + (void)reg; // Nothing to do here, the live recompiler performs a subtraction to get the switch's case. } @@ -1403,9 +1521,8 @@ void N64Recomp::LiveGenerator::emit_switch(const JumpTable& jtbl, int reg) const } context->switch_jump_labels.emplace_back(std::move(cur_labels)); - // Allocate the jump table. Must be manually allocated to prevent the address from changing. - void** cur_jump_table = new void*[jtbl.entries.size()]; - context->jump_tables.emplace_back(cur_jump_table); + // Allocate the jump table. + std::unique_ptr cur_jump_table = std::make_unique(jtbl.entries.size()); /// Codegen @@ -1423,18 +1540,25 @@ void N64Recomp::LiveGenerator::emit_switch(const JumpTable& jtbl, int reg) const // Multiply the jump table addend by 2 to get the addend for the real jump table. (4 bytes per entry to 8 bytes per entry). sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0); // Load the real jump table address. - sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp2, 0, SLJIT_IMM, (sljit_sw)cur_jump_table); + sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp2, 0, SLJIT_IMM, (sljit_sw)cur_jump_table.get()); // Load the real jump entry. sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp1, 0, SLJIT_MEM2(Registers::arithmetic_temp1, Registers::arithmetic_temp2), 0); // Jump to the loaded entry. sljit_emit_ijump(compiler, SLJIT_JUMP, Registers::arithmetic_temp1, 0); + + // Move the jump table into the pending jump tables. + context->pending_jump_tables.emplace_back(std::move(cur_jump_table)); } void N64Recomp::LiveGenerator::emit_case(int case_index, const std::string& target_label) const { + (void)case_index; + (void)target_label; // Nothing to do here, the jump table is built in emit_switch. } void N64Recomp::LiveGenerator::emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const { + (void)instr_vram; + (void)jtbl_vram; // Nothing to do here, the jump table is built in emit_switch. } @@ -1447,10 +1571,13 @@ void N64Recomp::LiveGenerator::emit_return() const { } void N64Recomp::LiveGenerator::emit_check_fr(int fpr) const { + (void)fpr; // Nothing to do here. } void N64Recomp::LiveGenerator::emit_check_nan(int fpr, bool is_double) const { + (void)fpr; + (void)is_double; // Nothing to do here. } @@ -1704,6 +1831,7 @@ void N64Recomp::LiveGenerator::emit_trigger_event(uint32_t event_index) const { } void N64Recomp::LiveGenerator::emit_comment(const std::string& comment) const { + (void)comment; // Nothing to do here. } diff --git a/include/recompiler/context.h b/include/recompiler/context.h index e355a6a..a98a9b5 100644 --- a/include/recompiler/context.h +++ b/include/recompiler/context.h @@ -188,6 +188,8 @@ namespace N64Recomp { std::vector reference_symbols; // Mapping of symbol name to reference symbol index. std::unordered_map reference_symbols_by_name; + // Whether all reference sections should be treated as relocatable (used in live recompilation). + bool all_reference_sections_relocatable = false; public: std::vector
sections; std::vector functions; @@ -200,6 +202,8 @@ namespace N64Recomp { // The target ROM being recompiled, TODO move this outside of the context to avoid making a copy for mod contexts. // Used for reading relocations and for the output binary feature. std::vector rom; + // Whether reference symbols should be validated when emitting function calls during recompilation. + bool skip_validating_reference_symbols = true; //// Only used by the CLI, TODO move this to a struct in the internal headers. // A mapping of function name to index in the functions vector @@ -372,6 +376,9 @@ namespace N64Recomp { } bool is_reference_section_relocatable(uint16_t section_index) const { + if (all_reference_sections_relocatable) { + return true; + } if (section_index == SectionAbsolute) { return false; } @@ -531,6 +538,10 @@ namespace N64Recomp { void copy_reference_sections_from(const Context& rhs) { reference_sections = rhs.reference_sections; } + + void set_all_reference_sections_relocatable() { + all_reference_sections_relocatable = true; + } }; class Generator; diff --git a/include/recompiler/generator.h b/include/recompiler/generator.h index 0fb3bda..99dd002 100644 --- a/include/recompiler/generator.h +++ b/include/recompiler/generator.h @@ -34,7 +34,9 @@ namespace N64Recomp { virtual void emit_function_end() const = 0; virtual void emit_function_call_lookup(uint32_t addr) const = 0; virtual void emit_function_call_by_register(int reg) const = 0; - virtual void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const = 0; + // target_section_offset can each be deduced from symbol_index if the full context is available, + // but for live recompilation the reference symbol list is unavailable so it's still provided. + virtual void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const = 0; virtual void emit_function_call(const Context& context, size_t function_index) const = 0; virtual void emit_goto(const std::string& target) const = 0; virtual void emit_label(const std::string& label_name) const = 0; @@ -70,7 +72,7 @@ namespace N64Recomp { void emit_function_end() const final; void emit_function_call_lookup(uint32_t addr) const final; void emit_function_call_by_register(int reg) const final; - void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const final; + void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final; void emit_function_call(const Context& context, size_t function_index) const final; void emit_goto(const std::string& target) const final; void emit_label(const std::string& label_name) const final; diff --git a/include/recompiler/live_recompiler.h b/include/recompiler/live_recompiler.h index 91fd60d..7b14107 100644 --- a/include/recompiler/live_recompiler.h +++ b/include/recompiler/live_recompiler.h @@ -1,6 +1,7 @@ #ifndef __LIVE_RECOMPILER_H__ #define __LIVE_RECOMPILER_H__ +#include #include "recompiler/generator.h" #include "recomp.h" @@ -8,6 +9,10 @@ struct sljit_compiler; namespace N64Recomp { struct LiveGeneratorContext; + struct ReferenceJumpDetails { + uint16_t section; + uint32_t section_offset; + }; struct LiveGeneratorOutput { LiveGeneratorOutput() = default; LiveGeneratorOutput(const LiveGeneratorOutput& rhs) = delete; @@ -20,29 +25,46 @@ namespace N64Recomp { code = rhs.code; code_size = rhs.code_size; functions = std::move(rhs.functions); + reference_symbol_jumps = std::move(rhs.reference_symbol_jumps); + import_jumps_by_index = std::move(rhs.import_jumps_by_index); + executable_offset = rhs.executable_offset; rhs.good = false; - rhs.string_literals.clear(); - rhs.jump_tables.clear(); rhs.code = nullptr; rhs.code_size = 0; + rhs.reference_symbol_jumps.clear(); + rhs.executable_offset = 0; return *this; } ~LiveGeneratorOutput(); + size_t num_reference_symbol_jumps() const; + void set_reference_symbol_jump(size_t jump_index, recomp_func_t* func); + ReferenceJumpDetails get_reference_symbol_jump_details(size_t jump_index); + void populate_import_symbol_jumps(size_t import_index, recomp_func_t* func); bool good = false; - // Storage for string literals referenced by recompiled code. These must be manually allocated to prevent - // them from moving, as the referenced address is baked into the recompiled code. - std::vector string_literals; - // Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These must also be manually allocated - // for the same reason as strings. - std::vector jump_tables; + // Storage for string literals referenced by recompiled code. These are allocated as unique_ptr arrays + // to prevent them from moving, as the referenced address is baked into the recompiled code. + std::vector> string_literals; + // Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These are also + // allocated as unique_ptr arrays for the same reason as strings. + std::vector> jump_tables; // Recompiled code. void* code; // Size of the recompiled code. size_t code_size; // Pointers to each individual function within the recompiled code. std::vector functions; + private: + // List of jump details and the corresponding jump instruction address. These jumps get populated after recompilation is complete + // during dependency resolution. + std::vector> reference_symbol_jumps; + // Mapping of import symbol index to any jumps to that import symbol. + std::unordered_multimap import_jumps_by_index; + // sljit executable offset. + int64_t executable_offset; + + friend class LiveGenerator; }; struct LiveGeneratorInputs { uint32_t base_event_index; @@ -75,7 +97,7 @@ namespace N64Recomp { void emit_function_end() const final; void emit_function_call_lookup(uint32_t addr) const final; void emit_function_call_by_register(int reg) const final; - void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const final; + void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final; void emit_function_call(const Context& context, size_t function_index) const final; void emit_goto(const std::string& target) const final; void emit_label(const std::string& label_name) const final; @@ -103,7 +125,7 @@ namespace N64Recomp { void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const; void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const; void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const; - // Loads the relocated address specified by the instruction context into R0. + // Loads the relocated address specified by the instruction context into the target register. void load_relocated_address(const InstructionContext& ctx, int reg) const; sljit_compiler* compiler; LiveGeneratorInputs inputs; diff --git a/include/recompiler/operations.h b/include/recompiler/operations.h index 4e7cd67..2f0a30b 100644 --- a/include/recompiler/operations.h +++ b/include/recompiler/operations.h @@ -99,6 +99,12 @@ namespace N64Recomp { LessEq, Greater, GreaterEq, + EqualF32, + LessF32, + LessEqF32, + EqualF64, + LessF64, + LessEqF64, // Loads LD, LW, diff --git a/src/cgenerator.cpp b/src/cgenerator.cpp index 1ca565d..d3ebacc 100644 --- a/src/cgenerator.cpp +++ b/src/cgenerator.cpp @@ -45,9 +45,15 @@ static std::vector c_op_fields = []() { setup_op(N64Recomp::BinaryOpType::Sra32, "S32", ">>"); // Arithmetic aspect will be taken care of by unary op for first operand. setup_op(N64Recomp::BinaryOpType::Sra64, "", ">>"); // Arithmetic aspect will be taken care of by unary op for first operand. setup_op(N64Recomp::BinaryOpType::Equal, "", "=="); + setup_op(N64Recomp::BinaryOpType::EqualF32, "", "=="); + setup_op(N64Recomp::BinaryOpType::EqualF64, "", "=="); setup_op(N64Recomp::BinaryOpType::NotEqual, "", "!="); setup_op(N64Recomp::BinaryOpType::Less, "", "<"); + setup_op(N64Recomp::BinaryOpType::LessF32, "", "<"); + setup_op(N64Recomp::BinaryOpType::LessF64, "", "<"); setup_op(N64Recomp::BinaryOpType::LessEq, "", "<="); + setup_op(N64Recomp::BinaryOpType::LessEqF32, "", "<="); + setup_op(N64Recomp::BinaryOpType::LessEqF64, "", "<="); setup_op(N64Recomp::BinaryOpType::Greater, "", ">"); setup_op(N64Recomp::BinaryOpType::GreaterEq, "", ">="); setup_op(N64Recomp::BinaryOpType::LD, "LD", ""); @@ -407,7 +413,8 @@ void N64Recomp::CGenerator::emit_function_call_by_register(int reg) const { fmt::print(output_file, "LOOKUP_FUNC({})(rdram, ctx);\n", gpr_to_string(reg)); } -void N64Recomp::CGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const { +void N64Recomp::CGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const { + (void)target_section_offset; const N64Recomp::ReferenceSymbol& sym = context.get_reference_symbol(section_index, symbol_index); fmt::print(output_file, "{}(rdram, ctx);\n", sym.name); } diff --git a/src/operations.cpp b/src/operations.cpp index bb2b325..e268717 100644 --- a/src/operations.cpp +++ b/src/operations.cpp @@ -99,33 +99,33 @@ namespace N64Recomp { { InstrId::cpu_div_s, { BinaryOpType::DivFloat, Operand::Fd, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true, true } }, { InstrId::cpu_div_d, { BinaryOpType::DivDouble, Operand::FdDouble, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true, true } }, // Float comparisons TODO remaining operations and investigate ordered/unordered and default values - { InstrId::cpu_c_lt_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_nge_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_olt_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_ult_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_lt_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_nge_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_olt_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_ult_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_lt_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_nge_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_olt_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_ult_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_lt_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_nge_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_olt_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_ult_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_le_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_ngt_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_ole_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_ule_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_le_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_ngt_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_ole_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_ule_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_le_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_ngt_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_ole_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_ule_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_le_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_ngt_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_ole_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_ule_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_eq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_ueq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_ngl_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_seq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, - { InstrId::cpu_c_eq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_ueq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, - { InstrId::cpu_c_ngl_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_eq_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_ueq_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_ngl_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_seq_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } }, + { InstrId::cpu_c_eq_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_ueq_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_ngl_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, /* TODO rename to c_seq_d when fixed in rabbitizer */ - { InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, + { InstrId::cpu_c_deq_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } }, // Loads { InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, { InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} }, diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 0988bf8..8500d53 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -28,7 +28,6 @@ JalResolutionResult resolve_jal(const N64Recomp::Context& context, size_t cur_se uint32_t section_vram_start = cur_section.ram_addr; uint32_t section_vram_end = cur_section.ram_addr + cur_section.size; bool in_current_section = target_func_vram >= section_vram_start && target_func_vram < section_vram_end; - bool needs_static = false; bool exact_match_found = false; // Use a thread local to prevent reallocation across runs and to allow multi-threading in the future. @@ -183,9 +182,9 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con // Don't try to relocate special section symbols. if (context.is_regular_reference_section(reloc.target_section) || reloc_section == N64Recomp::SectionAbsolute) { bool ref_section_relocatable = context.is_reference_section_relocatable(reloc.target_section); - uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section); // Resolve HI16 and LO16 reference symbol relocs to non-relocatable sections by patching the instruction immediate. if (!ref_section_relocatable && (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16)) { + uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section); uint32_t full_immediate = reloc.target_section_offset + ref_section_vram; if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16) { @@ -264,7 +263,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con return true; }; - auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, §ion, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &output_file, &print_link_branch] + auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &print_link_branch] (uint32_t target_func_vram, bool tail_call = false, bool indent = false) { bool call_by_lookup = false; @@ -286,16 +285,17 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con std::string jal_target_name{}; size_t matched_func_index = (size_t)-1; if (reloc_reference_symbol != (size_t)-1) { - const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol); - if (reloc_type != N64Recomp::RelocType::R_MIPS_26) { fmt::print(stderr, "Unsupported reloc type {} on jal instruction in {}\n", (int)reloc_type, func.name); return false; } - if (ref_symbol.section_offset != reloc_target_section_offset) { - fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name); - return false; + if (!context.skip_validating_reference_symbols) { + const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol); + if (ref_symbol.section_offset != reloc_target_section_offset) { + fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name); + return false; + } } } else { @@ -336,7 +336,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con } print_indent(); if (reloc_reference_symbol != (size_t)-1) { - generator.emit_function_call_reference_symbol(context, reloc_section, reloc_reference_symbol); + generator.emit_function_call_reference_symbol(context, reloc_section, reloc_reference_symbol, reloc_target_section_offset); } else if (call_by_lookup) { generator.emit_function_call_lookup(target_func_vram); @@ -392,7 +392,6 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con int rd = (int)instr.GetO32_rd(); int rs = (int)instr.GetO32_rs(); - int base = rs; int rt = (int)instr.GetO32_rt(); int sa = (int)instr.Get_sa();