From cc5294efa4b22cc5565dc528df7a94ab3c2795a1 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Sun, 13 Oct 2024 01:47:39 -0400 Subject: [PATCH] Implement switch statements and special function calls (break, syscall, trigger event) in live recompiler --- LiveRecomp/live_generator.cpp | 184 +++++++++++++++++++++++---- LiveRecomp/live_recompiler_test.cpp | 5 + include/recompiler/context.h | 13 ++ include/recompiler/generator.h | 12 +- include/recompiler/live_recompiler.h | 23 +++- src/analysis.h | 13 -- src/cgenerator.cpp | 15 ++- src/recompilation.cpp | 40 +++--- 8 files changed, 237 insertions(+), 68 deletions(-) diff --git a/LiveRecomp/live_generator.cpp b/LiveRecomp/live_generator.cpp index 130a645..dffd83f 100644 --- a/LiveRecomp/live_generator.cpp +++ b/LiveRecomp/live_generator.cpp @@ -10,6 +10,8 @@ #include "sljitLir.h" +constexpr uint64_t rdram_offset = 0xFFFFFFFF80000000ULL; + void N64Recomp::live_recompiler_init() { RabbitizerConfig_Cfg.pseudos.pseudoMove = false; RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false; @@ -19,7 +21,7 @@ void N64Recomp::live_recompiler_init() { } namespace Registers { - constexpr int rdram = SLJIT_S0; // stores (rdram - 0xFFFFFFFF80000000) + constexpr int rdram = SLJIT_S0; // stores (rdram - rdram_offset) constexpr int ctx = SLJIT_S1; // stores ctx constexpr int c1cs = SLJIT_S2; // stores ctx constexpr int hi = SLJIT_S3; // stores ctx @@ -40,11 +42,22 @@ struct ReferenceSymbolCall { uint16_t reference; }; +struct SwitchErrorJump { + uint32_t instr_vram; + uint32_t jtbl_vram; + sljit_jump* jump; +}; + struct N64Recomp::LiveGeneratorContext { + std::string function_name; std::unordered_map labels; std::unordered_map> pending_jumps; std::vector func_labels; std::vector inner_calls; + std::vector> switch_jump_labels; + // See LiveGeneratorOutput::jump_tables for info. + std::vector jump_tables; + std::vector switch_error_jumps; sljit_jump* cur_branch_jump; }; @@ -77,6 +90,47 @@ N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() { sljit_set_label(call.jump, target_func_label); } + // Generate the switch error jump targets and assign the jump labels. + if (!context->switch_error_jumps.empty()) { + // Allocate the function name and place it in the literals. + char* func_name = new char[context->function_name.size() + 1]; + memcpy(func_name, context->function_name.c_str(), context->function_name.size()); + func_name[context->function_name.size()] = '\x00'; + ret.string_literals.emplace_back(func_name); + + std::vector switch_error_return_jumps{}; + switch_error_return_jumps.resize(context->switch_error_jumps.size()); + + // Generate and assign the labels for the switch error jumps. + for (size_t i = 0; i < context->switch_error_jumps.size(); i++) { + const auto& cur_error_jump = context->switch_error_jumps[i]; + + // Generate a label and assign it to the jump. + sljit_set_label(cur_error_jump.jump, sljit_emit_label(compiler)); + + // Load the arguments (function name, vram, jump table address) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sljit_sw(func_name)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, sljit_sw(cur_error_jump.instr_vram)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, sljit_sw(cur_error_jump.jtbl_vram)); + + // Call switch_error. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3V(P, 32, 32), SLJIT_IMM, sljit_sw(inputs.switch_error)); + + // Jump to the return statement. + switch_error_return_jumps[i] = sljit_emit_jump(compiler, SLJIT_JUMP); + } + + // Generate the return statement. + sljit_label* return_label = sljit_emit_label(compiler); + sljit_emit_return_void(compiler); + + // Assign the label for all the return jumps. + for (sljit_jump* cur_jump : switch_error_return_jumps) { + sljit_set_label(cur_jump, return_label); + } + } + context->switch_error_jumps.clear(); + // Generate the code. ret.code = sljit_generate_code(compiler, 0, NULL); ret.code_size = sljit_get_generated_code_size(compiler); @@ -92,6 +146,34 @@ N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() { } } + // Populate all the switch case addresses. + bool invalid_switch = false; + for (size_t switch_index = 0; switch_index < context->switch_jump_labels.size(); switch_index++) { + const std::vector& cur_labels = context->switch_jump_labels[switch_index]; + void** cur_jump_table = context->jump_tables[switch_index]; + for (size_t case_index = 0; case_index < cur_labels.size(); case_index++) { + // Find the label. + auto find_it = context->labels.find(cur_labels[case_index]); + if (find_it == context->labels.end()) { + // Label not found, invalid switch. + // Don't return immediately, as we need to ensure that all the jump tables end up in ret + // so that it cleans them up in its destructor. + invalid_switch = true; + break; + } + + // Get the label's address and place it in the jump table. + cur_jump_table[case_index] = reinterpret_cast(sljit_get_label_addr(find_it->second)); + } + ret.jump_tables.emplace_back(cur_jump_table); + } + context->switch_jump_labels.clear(); + context->jump_tables.clear(); + + if (invalid_switch) { + return { }; + } + sljit_free_compiler(compiler); compiler = nullptr; @@ -101,10 +183,18 @@ N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() { N64Recomp::LiveGeneratorOutput::~LiveGeneratorOutput() { if (code != nullptr) { sljit_free_code(code, nullptr); + code = nullptr; } + for (const char* literal : string_literals) { delete[] literal; } + string_literals.clear(); + + for (void** jump_table : jump_tables) { + delete[] jump_table; + } + jump_tables.clear(); } constexpr int get_gpr_context_offset(int gpr_index) { @@ -703,9 +793,10 @@ void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const Instruc } void N64Recomp::LiveGenerator::emit_function_start(const std::string& function_name, size_t func_index) const { + context->function_name = function_name; context->func_labels[func_index] = sljit_emit_label(compiler); sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4, 5, 0); - sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, 0xFFFFFFFF80000000); + sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); } void N64Recomp::LiveGenerator::emit_function_end() const { @@ -723,7 +814,7 @@ void N64Recomp::LiveGenerator::emit_function_call_lookup(uint32_t addr) const { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_R0, 0); // Load rdram and ctx into R0 and R1. - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, 0xFFFFFFFF80000000); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); // Call the function. @@ -741,7 +832,7 @@ void N64Recomp::LiveGenerator::emit_function_call_by_register(int reg) const { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_R0, 0); // Load rdram and ctx into R0 and R1. - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, 0xFFFFFFFF80000000); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); // Call the function. @@ -754,8 +845,10 @@ void N64Recomp::LiveGenerator::emit_function_call_reference_symbol(const Context } void N64Recomp::LiveGenerator::emit_function_call(const Context& recompiler_context, size_t function_index) const { - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, 0xFFFFFFFF80000000); + // Load rdram and ctx into R0 and R1. + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); + // Call the function and save the jump to set its label later on. sljit_jump* call_jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2V(P, P)); context->inner_calls.emplace_back(InnerCall{ .target_func_index = function_index, .jump = call_jump }); } @@ -790,8 +883,8 @@ void N64Recomp::LiveGenerator::emit_label(const std::string& label_name) const { context->labels.emplace(label_name, label); } -void N64Recomp::LiveGenerator::emit_variable_declaration(const std::string& var_name, int reg) const { - assert(false); +void N64Recomp::LiveGenerator::emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const { + // Nothing to do here, the live recompiler performs a subtraction to get the switch's case. } void N64Recomp::LiveGenerator::emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const { @@ -869,20 +962,52 @@ void N64Recomp::LiveGenerator::emit_branch_close() const { context->cur_branch_jump = nullptr; } -void N64Recomp::LiveGenerator::emit_switch(const std::string& jump_variable, int shift_amount) const { - assert(false); +void N64Recomp::LiveGenerator::emit_switch(const JumpTable& jtbl, int reg) const { + // Populate the switch's labels. + std::vector cur_labels{}; + cur_labels.resize(jtbl.entries.size()); + for (size_t i = 0; i < cur_labels.size(); i++) { + cur_labels[i] = fmt::format("L_{:08X}", jtbl.entries[i]); + } + context->switch_jump_labels.emplace_back(std::move(cur_labels)); + + // Allocate the jump table. Must be manually allocated to prevent the address from changing. + void** cur_jump_table = new void*[jtbl.entries.size()]; + context->jump_tables.emplace_back(cur_jump_table); + + /// Codegen + + // Load the jump target register. The lw instruction was patched into an addiu, so this holds + // the address of the jump table entry instead of the actual jump target. + sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp1, 0, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(reg)); + // Subtract the jump table's address from the jump target to get the jump table addend. + // Sign extend the jump table address to 64 bits so that the entire register's contents are used instead of just the lower 32 bits. + sljit_emit_op2(compiler, SLJIT_SUB, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_IMM, (sljit_sw)((int32_t)jtbl.vram)); + + // Bounds check the addend. If it's greater than or equal to the jump table size (entries * sizeof(u32)) then jump to the switch error. + sljit_jump* switch_error_jump = sljit_emit_cmp(compiler, SLJIT_GREATER_EQUAL, Registers::arithmetic_temp1, 0, SLJIT_IMM, jtbl.entries.size() * sizeof(uint32_t)); + context->switch_error_jumps.emplace_back(SwitchErrorJump{.instr_vram = jtbl.jr_vram, .jtbl_vram = jtbl.vram, .jump = switch_error_jump}); + + // Multiply the jump table addend by 2 to get the addend for the real jump table. (4 bytes per entry to 8 bytes per entry). + sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0); + // Load the real jump table address. + sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp2, 0, SLJIT_IMM, (sljit_sw)cur_jump_table); + // Load the real jump entry. + sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp1, 0, SLJIT_MEM2(Registers::arithmetic_temp1, Registers::arithmetic_temp2), 0); + // Jump to the loaded entry. + sljit_emit_ijump(compiler, SLJIT_JUMP, Registers::arithmetic_temp1, 0); } void N64Recomp::LiveGenerator::emit_case(int case_index, const std::string& target_label) const { - assert(false); + // Nothing to do here, the jump table is built in emit_switch. } void N64Recomp::LiveGenerator::emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const { - assert(false); + // Nothing to do here, the jump table is built in emit_switch. } void N64Recomp::LiveGenerator::emit_switch_close() const { - assert(false); + // Nothing to do here, the jump table is built in emit_switch. } void N64Recomp::LiveGenerator::emit_return() const { @@ -1005,11 +1130,8 @@ void N64Recomp::LiveGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) } // If the denominator is 0, skip the division and jump the special handling for that case. - // Set the zero flag if the denominator is zero by AND'ing it with itself. - sljit_emit_op2u(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0); - - // Branch past the division if the zero flag is 0. - sljit_jump* jump_skip_division = sljit_emit_jump(compiler, SLJIT_ZERO); + // Branch past the division if the divisor is 0. + sljit_jump* jump_skip_division = sljit_emit_cmp(compiler, SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);// sljit_emit_jump(compiler, SLJIT_ZERO); // Perform the division. sljit_emit_op0(compiler, div_opcode); @@ -1078,19 +1200,37 @@ void N64Recomp::LiveGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) } void N64Recomp::LiveGenerator::emit_syscall(uint32_t instr_vram) const { - assert(false); + // Load rdram and ctx into R0 and R1. + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); + // Load the vram into R2. + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, instr_vram); + // Call syscall_handler. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3V(P, P, 32), SLJIT_IMM, sljit_sw(inputs.syscall_handler)); } void N64Recomp::LiveGenerator::emit_do_break(uint32_t instr_vram) const { - assert(false); + // Load the vram into R0. + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, instr_vram); + // Call do_break. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1V(32), SLJIT_IMM, sljit_sw(inputs.do_break)); } void N64Recomp::LiveGenerator::emit_pause_self() const { - assert(false); + // Load rdram into R0. + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); + // Call pause_self. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1V(P), SLJIT_IMM, sljit_sw(inputs.pause_self)); } -void N64Recomp::LiveGenerator::emit_trigger_event(size_t event_index) const { - assert(false); +void N64Recomp::LiveGenerator::emit_trigger_event(uint32_t event_index) const { + // Load rdram and ctx into R0 and R1. + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0); + // Load the global event index into R2. + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, event_index + inputs.base_event_index); + // Call trigger_event. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1V(P), SLJIT_IMM, sljit_sw(inputs.trigger_event)); } void N64Recomp::LiveGenerator::emit_comment(const std::string& comment) const { diff --git a/LiveRecomp/live_recompiler_test.cpp b/LiveRecomp/live_recompiler_test.cpp index 900e92d..ee537f5 100644 --- a/LiveRecomp/live_recompiler_test.cpp +++ b/LiveRecomp/live_recompiler_test.cpp @@ -78,6 +78,10 @@ recomp_func_t* test_get_function(int32_t vram) { return nullptr; } +void test_switch_error(const char* func, uint32_t vram, uint32_t jtbl) { + printf(" Switch-case out of bounds in %s at 0x%08X for jump table at 0x%08X\n", func, vram, jtbl); +} + TestStats run_test(const std::filesystem::path& tests_dir, const std::string& test_name) { std::filesystem::path input_path = tests_dir / (test_name + "_data.bin"); std::filesystem::path data_dump_path = tests_dir / (test_name + "_data_out.bin"); @@ -199,6 +203,7 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te auto before_codegen = std::chrono::system_clock::now(); N64Recomp::LiveGeneratorInputs generator_inputs { + .switch_error = test_switch_error, .get_function = test_get_function, }; diff --git a/include/recompiler/context.h b/include/recompiler/context.h index b335849..e355a6a 100644 --- a/include/recompiler/context.h +++ b/include/recompiler/context.h @@ -36,6 +36,19 @@ namespace N64Recomp { : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {} Function() = default; }; + + struct JumpTable { + uint32_t vram; + uint32_t addend_reg; + uint32_t rom; + uint32_t lw_vram; + uint32_t addu_vram; + uint32_t jr_vram; + std::vector entries; + + JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, std::vector&& entries) + : vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), entries(std::move(entries)) {} + }; enum class RelocType : uint8_t { R_MIPS_NONE = 0, diff --git a/include/recompiler/generator.h b/include/recompiler/generator.h index fa0acf0..0fb3bda 100644 --- a/include/recompiler/generator.h +++ b/include/recompiler/generator.h @@ -38,10 +38,10 @@ namespace N64Recomp { virtual void emit_function_call(const Context& context, size_t function_index) const = 0; virtual void emit_goto(const std::string& target) const = 0; virtual void emit_label(const std::string& label_name) const = 0; - virtual void emit_variable_declaration(const std::string& var_name, int reg) const = 0; + virtual void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const = 0; virtual void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0; virtual void emit_branch_close() const = 0; - virtual void emit_switch(const std::string& jump_variable, int shift_amount) const = 0; + virtual void emit_switch(const JumpTable& jtbl, int reg) const = 0; virtual void emit_case(int case_index, const std::string& target_label) const = 0; virtual void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const = 0; virtual void emit_switch_close() const = 0; @@ -56,7 +56,7 @@ namespace N64Recomp { virtual void emit_syscall(uint32_t instr_vram) const = 0; virtual void emit_do_break(uint32_t instr_vram) const = 0; virtual void emit_pause_self() const = 0; - virtual void emit_trigger_event(size_t event_index) const = 0; + virtual void emit_trigger_event(uint32_t event_index) const = 0; virtual void emit_comment(const std::string& comment) const = 0; }; @@ -74,10 +74,10 @@ namespace N64Recomp { void emit_function_call(const Context& context, size_t function_index) const final; void emit_goto(const std::string& target) const final; void emit_label(const std::string& label_name) const final; - void emit_variable_declaration(const std::string& var_name, int reg) const final; + void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const final; void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const final; void emit_branch_close() const final; - void emit_switch(const std::string& jump_variable, int shift_amount) const final; + void emit_switch(const JumpTable& jtbl, int reg) const final; void emit_case(int case_index, const std::string& target_label) const final; void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const final; void emit_switch_close() const final; @@ -92,7 +92,7 @@ namespace N64Recomp { void emit_syscall(uint32_t instr_vram) const final; void emit_do_break(uint32_t instr_vram) const final; void emit_pause_self() const final; - void emit_trigger_event(size_t event_index) const final; + void emit_trigger_event(uint32_t event_index) const final; void emit_comment(const std::string& comment) const final; private: void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const; diff --git a/include/recompiler/live_recompiler.h b/include/recompiler/live_recompiler.h index bc81601..76fa812 100644 --- a/include/recompiler/live_recompiler.h +++ b/include/recompiler/live_recompiler.h @@ -15,11 +15,15 @@ namespace N64Recomp { LiveGeneratorOutput& operator=(const LiveGeneratorOutput& rhs) = delete; LiveGeneratorOutput& operator=(LiveGeneratorOutput&& rhs) { good = rhs.good; - functions = std::move(rhs.functions); + string_literals = std::move(rhs.string_literals); + jump_tables = std::move(rhs.jump_tables); code = rhs.code; code_size = rhs.code_size; + functions = std::move(rhs.functions); rhs.good = false; + rhs.string_literals.clear(); + rhs.jump_tables.clear(); rhs.code = nullptr; rhs.code_size = 0; @@ -30,6 +34,9 @@ namespace N64Recomp { // Storage for string literals referenced by recompiled code. These must be manually allocated to prevent // them from moving, as the referenced address is baked into the recompiled code. std::vector string_literals; + // Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These must also be manually allocated + // for the same reason as strings. + std::vector jump_tables; // Recompiled code. void* code; // Size of the recompiled code. @@ -38,6 +45,7 @@ namespace N64Recomp { std::vector functions; }; struct LiveGeneratorInputs { + uint32_t base_event_index; void (*cop0_status_write)(recomp_context* ctx, gpr value); gpr (*cop0_status_read)(recomp_context* ctx); void (*switch_error)(const char* func, uint32_t vram, uint32_t jtbl); @@ -45,11 +53,18 @@ namespace N64Recomp { recomp_func_t* (*get_function)(int32_t vram); void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram); void (*pause_self)(uint8_t* rdram); + void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index); }; class LiveGenerator final : public Generator { public: LiveGenerator(size_t num_funcs, const LiveGeneratorInputs& inputs); ~LiveGenerator(); + // Prevent moving or copying. + LiveGenerator(const LiveGenerator& rhs) = delete; + LiveGenerator(LiveGenerator&& rhs) = delete; + LiveGenerator& operator=(const LiveGenerator& rhs) = delete; + LiveGenerator& operator=(LiveGenerator&& rhs) = delete; + LiveGeneratorOutput finish(); void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const final; void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const final; @@ -62,10 +77,10 @@ namespace N64Recomp { void emit_function_call(const Context& context, size_t function_index) const final; void emit_goto(const std::string& target) const final; void emit_label(const std::string& label_name) const final; - void emit_variable_declaration(const std::string& var_name, int reg) const final; + void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const final; void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const final; void emit_branch_close() const final; - void emit_switch(const std::string& jump_variable, int shift_amount) const final; + void emit_switch(const JumpTable& jtbl, int reg) const final; void emit_case(int case_index, const std::string& target_label) const final; void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const final; void emit_switch_close() const final; @@ -80,7 +95,7 @@ namespace N64Recomp { void emit_syscall(uint32_t instr_vram) const final; void emit_do_break(uint32_t instr_vram) const final; void emit_pause_self() const final; - void emit_trigger_event(size_t event_index) const final; + void emit_trigger_event(uint32_t event_index) const final; void emit_comment(const std::string& comment) const final; private: void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const; diff --git a/src/analysis.h b/src/analysis.h index 44a8f4c..9e0562e 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -7,19 +7,6 @@ #include "recompiler/context.h" namespace N64Recomp { - struct JumpTable { - uint32_t vram; - uint32_t addend_reg; - uint32_t rom; - uint32_t lw_vram; - uint32_t addu_vram; - uint32_t jr_vram; - std::vector entries; - - JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, std::vector&& entries) - : vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), entries(std::move(entries)) {} - }; - struct AbsoluteJump { uint32_t jump_target; uint32_t instruction_vram; diff --git a/src/cgenerator.cpp b/src/cgenerator.cpp index 5d6a6bc..ee6a819 100644 --- a/src/cgenerator.cpp +++ b/src/cgenerator.cpp @@ -403,8 +403,9 @@ void N64Recomp::CGenerator::emit_label(const std::string& label_name) const { "{}:\n", label_name); } -void N64Recomp::CGenerator::emit_variable_declaration(const std::string& var_name, int reg) const { - fmt::print(output_file, "gpr {} = {};\n", var_name, gpr_to_string(reg)); +void N64Recomp::CGenerator::emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const { + std::string jump_variable = fmt::format("jr_addend_{:08X}", jtbl.jr_vram); + fmt::print(output_file, "gpr {} = {};\n", jump_variable, gpr_to_string(reg)); } void N64Recomp::CGenerator::emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const { @@ -423,8 +424,12 @@ void N64Recomp::CGenerator::emit_switch_close() const { fmt::print(output_file, "}}\n"); } -void N64Recomp::CGenerator::emit_switch(const std::string& jump_variable, int shift_amount) const { - fmt::print(output_file, "switch ({} >> {}) {{\n", jump_variable, shift_amount); +void N64Recomp::CGenerator::emit_switch(const JumpTable& jtbl, int reg) const { + // TODO generate code to subtract the jump table address from the register's value instead. + // Once that's done, the addend temp can be deleted to simplify the generator interface. + std::string jump_variable = fmt::format("jr_addend_{:08X}", jtbl.jr_vram); + + fmt::print(output_file, "switch ({} >> 2) {{\n", jump_variable); } void N64Recomp::CGenerator::emit_case(int case_index, const std::string& target_label) const { @@ -508,7 +513,7 @@ void N64Recomp::CGenerator::emit_pause_self() const { fmt::print(output_file, "pause_self(rdram);\n"); } -void N64Recomp::CGenerator::emit_trigger_event(size_t event_index) const { +void N64Recomp::CGenerator::emit_trigger_event(uint32_t event_index) const { fmt::print(output_file, "recomp_trigger_event(rdram, ctx, base_event_index + {});\n", event_index); } diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 2717545..e28a268 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -110,7 +110,7 @@ std::string_view ctx_gpr_prefix(int reg) { } template -bool process_instruction(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set& skipped_insns, size_t instr_index, const std::vector& instructions, std::ostream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span> static_funcs_out) { +bool process_instruction(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set& jtbl_lw_instructions, size_t instr_index, const std::vector& instructions, std::ostream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span> static_funcs_out) { using namespace N64Recomp; const auto& section = context.sections[func.section_index]; @@ -118,6 +118,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con needs_link_branch = false; is_branch_likely = false; uint32_t instr_vram = instr.getVram(); + InstrId instr_id = instr.getUniqueId(); auto print_indent = [&]() { fmt::print(output_file, " "); @@ -133,16 +134,19 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con // Output a comment with the original instruction print_indent(); - if (instr.isBranch() || instr.getUniqueId() == InstrId::cpu_j) { + if (instr.isBranch() || instr_id == InstrId::cpu_j) { generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())))); - } else if (instr.getUniqueId() == InstrId::cpu_jal) { + } else if (instr_id == InstrId::cpu_jal) { generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())))); } else { generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0))); } - if (skipped_insns.contains(instr_vram)) { - return true; + // Replace loads for jump table entries into addiu. This leaves the jump table entry's address in the output register + // instead of the entry's value, which can then be used to determine the offset from the start of the jump table. + if (jtbl_lw_instructions.contains(instr_vram)) { + assert(instr_id == InstrId::cpu_lw); + instr_id = InstrId::cpu_addiu; } N64Recomp::RelocType reloc_type = N64Recomp::RelocType::R_MIPS_NONE; @@ -216,7 +220,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { next_reloc_index++; } - if (!process_instruction(generator, context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, use_indent, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) { + if (!process_instruction(generator, context, func, stats, jtbl_lw_instructions, instr_index + 1, instructions, output_file, use_indent, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) { return false; } } @@ -274,7 +278,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con return false; } print_indent(); - generator.emit_trigger_event(reloc_reference_symbol); + generator.emit_trigger_event((uint32_t)reloc_reference_symbol); print_link_branch(); } // Normal symbol or reference symbol, @@ -398,7 +402,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con bool handled = true; - switch (instr.getUniqueId()) { + switch (instr_id) { case InstrId::cpu_nop: fmt::print(output_file, "\n"); break; @@ -444,7 +448,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con if (find_result != stats.jump_tables.end()) { const N64Recomp::JumpTable& cur_jtbl = *find_result; print_indent(); - generator.emit_variable_declaration(fmt::format("jr_addend_{:08X}", cur_jtbl.jr_vram), cur_jtbl.addend_reg); + generator.emit_jtbl_addend_declaration(cur_jtbl, cur_jtbl.addend_reg); } } break; @@ -457,7 +461,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con case InstrId::cpu_divu: case InstrId::cpu_ddivu: print_indent(); - generator.emit_muldiv(instr.getUniqueId(), rs, rt); + generator.emit_muldiv(instr_id, rs, rt); break; // Branches case InstrId::cpu_jal: @@ -527,7 +531,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con return false; } print_indent(); - generator.emit_switch(fmt::format("jr_addend_{:08X}", cur_jtbl.jr_vram), 2); + generator.emit_switch(cur_jtbl, rs); for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) { print_indent(); print_indent(); @@ -652,7 +656,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con } }; - auto find_binary_it = binary_ops.find(instr.getUniqueId()); + auto find_binary_it = binary_ops.find(instr_id); if (find_binary_it != binary_ops.end()) { print_indent(); const BinaryOp& op = find_binary_it->second; @@ -674,7 +678,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con handled = true; } - auto find_unary_it = unary_ops.find(instr.getUniqueId()); + auto find_unary_it = unary_ops.find(instr_id); if (find_unary_it != unary_ops.end()) { print_indent(); const UnaryOp& op = find_unary_it->second; @@ -694,7 +698,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con handled = true; } - auto find_conditional_branch_it = conditional_branch_ops.find(instr.getUniqueId()); + auto find_conditional_branch_it = conditional_branch_ops.find(instr_id); if (find_conditional_branch_it != conditional_branch_ops.end()) { print_indent(); // TODO combining the branch condition and branch target into one generator call would allow better optimization in the runtime's JIT generator. @@ -720,7 +724,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con handled = true; } - auto find_store_it = store_ops.find(instr.getUniqueId()); + auto find_store_it = store_ops.find(instr_id); if (find_store_it != store_ops.end()) { print_indent(); const StoreOp& op = find_store_it->second; @@ -794,11 +798,11 @@ bool recompile_function_impl(GeneratorType& generator, const N64Recomp::Context& return false; } - std::unordered_set skipped_insns{}; + std::unordered_set jtbl_lw_instructions{}; // Add jump table labels into function for (const auto& jtbl : stats.jump_tables) { - skipped_insns.insert(jtbl.lw_vram); + jtbl_lw_instructions.insert(jtbl.lw_vram); for (uint32_t jtbl_entry : jtbl.entries) { branch_labels.insert(jtbl_entry); } @@ -832,7 +836,7 @@ bool recompile_function_impl(GeneratorType& generator, const N64Recomp::Context& } // Process the current instruction and check for errors - if (process_instruction(generator, context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) { + if (process_instruction(generator, context, func, stats, jtbl_lw_instructions, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) { fmt::print(stderr, "Error in recompiling {}, clearing output file\n", func.name); output_file.clear(); return false;