From 46ab6f758d64272770ffa94ffd478606b086b099 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Sat, 28 Sep 2024 23:58:25 -0400 Subject: [PATCH] Moved remaining instruction processing text generation into Generator class --- include/generator.h | 46 +++++++ src/analysis.cpp | 13 +- src/analysis.h | 1 - src/cgenerator.cpp | 128 +++++++++++++++++- src/recompilation.cpp | 295 ++++++++++++++++++++++-------------------- 5 files changed, 332 insertions(+), 151 deletions(-) diff --git a/include/generator.h b/include/generator.h index 5afcc57..cd41b25 100644 --- a/include/generator.h +++ b/include/generator.h @@ -30,10 +30,33 @@ namespace N64Recomp { virtual void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const = 0; virtual void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const = 0; virtual void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const = 0; + virtual void emit_function_start(std::ostream& output_file, const std::string& function_name) const = 0; + virtual void emit_function_end(std::ostream& output_file) const = 0; + virtual void emit_function_call_lookup(std::ostream& output_file, uint32_t addr) const = 0; + virtual void emit_function_call_by_register(std::ostream& output_file, int reg) const = 0; + virtual void emit_function_call_by_name(std::ostream& output_file, const std::string& func_name) const = 0; + virtual void emit_goto(std::ostream& output_file, const std::string& target) const = 0; + virtual void emit_label(std::ostream& output_file, const std::string& label_name) const = 0; + virtual void emit_variable_declaration(std::ostream& output_file, const std::string& var_name, int reg) const = 0; virtual void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0; virtual void emit_branch_close(std::ostream& output_file) const = 0; + virtual void emit_switch(std::ostream& output_file, const std::string& jump_variable, int shift_amount) const = 0; + virtual void emit_case(std::ostream& output_file, int case_index, const std::string& target_label) const = 0; + virtual void emit_switch_error(std::ostream& output_file, uint32_t instr_vram, uint32_t jtbl_vram) const = 0; + virtual void emit_switch_close(std::ostream& output_file) const = 0; + virtual void emit_return(std::ostream& output_file) const = 0; virtual void emit_check_fr(std::ostream& output_file, int fpr) const = 0; virtual void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const = 0; + virtual void emit_cop0_status_read(std::ostream& output_file, int reg) const = 0; + virtual void emit_cop0_status_write(std::ostream& output_file, int reg) const = 0; + virtual void emit_cop1_cs_read(std::ostream& output_file, int reg) const = 0; + virtual void emit_cop1_cs_write(std::ostream& output_file, int reg) const = 0; + virtual void emit_muldiv(std::ostream& output_file, InstrId instr_id, int reg1, int reg2) const = 0; + virtual void emit_syscall(std::ostream& output_file, uint32_t instr_vram) const = 0; + virtual void emit_do_break(std::ostream& output_file, uint32_t instr_vram) const = 0; + virtual void emit_pause_self(std::ostream& output_file) const = 0; + virtual void emit_trigger_event(std::ostream& output_file, size_t event_index) const = 0; + virtual void emit_comment(std::ostream& output_file, const std::string& comment) const = 0; }; class CGenerator final : Generator { @@ -42,10 +65,33 @@ namespace N64Recomp { void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const final; void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const final; void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const final; + void emit_function_start(std::ostream& output_file, const std::string& function_name) const final; + void emit_function_end(std::ostream& output_file) const final; + void emit_function_call_lookup(std::ostream& output_file, uint32_t addr) const final; + void emit_function_call_by_register(std::ostream& output_file, int reg) const final; + void emit_function_call_by_name(std::ostream& output_file, const std::string& func_name) const final; + void emit_goto(std::ostream& output_file, const std::string& target) const final; + void emit_label(std::ostream& output_file, const std::string& label_name) const final; + void emit_variable_declaration(std::ostream& output_file, const std::string& var_name, int reg) const final; void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const final; void emit_branch_close(std::ostream& output_file) const final; + void emit_switch(std::ostream& output_file, const std::string& jump_variable, int shift_amount) const final; + void emit_case(std::ostream& output_file, int case_index, const std::string& target_label) const final; + void emit_switch_error(std::ostream& output_file, uint32_t instr_vram, uint32_t jtbl_vram) const final; + void emit_switch_close(std::ostream& output_file) const final; + void emit_return(std::ostream& output_file) const final; void emit_check_fr(std::ostream& output_file, int fpr) const final; void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const final; + void emit_cop0_status_read(std::ostream& output_file, int reg) const final; + void emit_cop0_status_write(std::ostream& output_file, int reg) const final; + void emit_cop1_cs_read(std::ostream& output_file, int reg) const final; + void emit_cop1_cs_write(std::ostream& output_file, int reg) const final; + void emit_muldiv(std::ostream& output_file, InstrId instr_id, int reg1, int reg2) const final; + void emit_syscall(std::ostream& output_file, uint32_t instr_vram) const final; + void emit_do_break(std::ostream& output_file, uint32_t instr_vram) const final; + void emit_pause_self(std::ostream& output_file) const final; + void emit_trigger_event(std::ostream& output_file, size_t event_index) const final; + void emit_comment(std::ostream& output_file, const std::string& comment) const final; private: void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const; void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const; diff --git a/src/analysis.cpp b/src/analysis.cpp index 5dfd955..db8c7a2 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -196,19 +196,8 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const N64Recom instr.getVram(), std::vector{} ); - } else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) { - uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui; - stats.absolute_jumps.emplace_back( - address, - instr.getVram() - ); - } - // Allow tail calls (TODO account for trailing nops due to bad function splits) - else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) { - // Inconclusive analysis - fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name); - return false; } + // TODO stricter validation on tail calls, since not all indirect jumps can be treated as one. break; default: if (instr.modifiesRd()) { diff --git a/src/analysis.h b/src/analysis.h index eafd1e7..19eb55a 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -29,7 +29,6 @@ namespace N64Recomp { struct FunctionStats { std::vector jump_tables; - std::vector absolute_jumps; }; bool analyze_function(const Context& context, const Function& function, const std::vector& instructions, FunctionStats& stats); diff --git a/src/cgenerator.cpp b/src/cgenerator.cpp index 7751568..f8607e6 100644 --- a/src/cgenerator.cpp +++ b/src/cgenerator.cpp @@ -365,6 +365,46 @@ void N64Recomp::CGenerator::get_binary_expr_string(BinaryOpType type, const Bina } } +void N64Recomp::CGenerator::emit_function_start(std::ostream& output_file, const std::string& function_name) const { + fmt::print(output_file, + "RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n" + // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output + " uint64_t hi = 0, lo = 0, result = 0;\n" + " unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n" + " int c1cs = 0;\n", // cop1 conditional signal + function_name); +} + +void N64Recomp::CGenerator::emit_function_end(std::ostream& output_file) const { + fmt::print(output_file, ";}}\n"); +} + +void N64Recomp::CGenerator::emit_function_call_lookup(std::ostream& output_file, uint32_t addr) const { + fmt::print(output_file, "LOOKUP_FUNC(0x{:08X})(rdram, ctx);\n", addr); +} + +void N64Recomp::CGenerator::emit_function_call_by_register(std::ostream& output_file, int reg) const { + fmt::print(output_file, "LOOKUP_FUNC({})(rdram, ctx);\n", gpr_to_string(reg)); +} + +void N64Recomp::CGenerator::emit_function_call_by_name(std::ostream& output_file, const std::string& func_name) const { + fmt::print(output_file, "{}(rdram, ctx);\n", func_name); +} + +void N64Recomp::CGenerator::emit_goto(std::ostream& output_file, const std::string& target) const { + fmt::print(output_file, + " goto {};\n", target); +} + +void N64Recomp::CGenerator::emit_label(std::ostream& output_file, const std::string& label_name) const { + fmt::print(output_file, + "{}:\n", label_name); +} + +void N64Recomp::CGenerator::emit_variable_declaration(std::ostream& output_file, const std::string& var_name, int reg) const { + fmt::print(output_file, "gpr {} = {};\n", var_name, gpr_to_string(reg)); +} + void N64Recomp::CGenerator::emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const { // Thread local variables to prevent allocations when possible. // TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations. @@ -374,7 +414,27 @@ void N64Recomp::CGenerator::emit_branch_condition(std::ostream& output_file, con } void N64Recomp::CGenerator::emit_branch_close(std::ostream& output_file) const { - fmt::print(output_file, " }}\n"); + fmt::print(output_file, "}}\n"); +} + +void N64Recomp::CGenerator::emit_switch_close(std::ostream& output_file) const { + fmt::print(output_file, "}}\n"); +} + +void N64Recomp::CGenerator::emit_switch(std::ostream& output_file, const std::string& jump_variable, int shift_amount) const { + fmt::print(output_file, "switch ({} >> {}) {{\n", jump_variable, shift_amount); +} + +void N64Recomp::CGenerator::emit_case(std::ostream& output_file, int case_index, const std::string& target_label) const { + fmt::print(output_file, "case {}: goto {}; break;\n", case_index, target_label); +} + +void N64Recomp::CGenerator::emit_switch_error(std::ostream& output_file, uint32_t instr_vram, uint32_t jtbl_vram) const { + fmt::print(output_file, "default: switch_error(__func__, 0x{:08X}, 0x{:08X});\n", instr_vram, jtbl_vram); +} + +void N64Recomp::CGenerator::emit_return(std::ostream& output_file) const { + fmt::print(output_file, "return;\n"); } void N64Recomp::CGenerator::emit_check_fr(std::ostream& output_file, int fpr) const { @@ -385,6 +445,72 @@ void N64Recomp::CGenerator::emit_check_nan(std::ostream& output_file, int fpr, b fmt::print(output_file, "NAN_CHECK(ctx->f{}.{}); ", fpr, is_double ? "d" : "fl"); } +void N64Recomp::CGenerator::emit_cop0_status_read(std::ostream& output_file, int reg) const { + fmt::print(output_file, "{} = cop0_status_read(ctx);\n", gpr_to_string(reg)); +} + +void N64Recomp::CGenerator::emit_cop0_status_write(std::ostream& output_file, int reg) const { + fmt::print(output_file, "cop0_status_write(ctx, {})", gpr_to_string(reg)); +} + +void N64Recomp::CGenerator::emit_cop1_cs_read(std::ostream& output_file, int reg) const { + fmt::print(output_file, "{} = rounding_mode;\n", gpr_to_string(reg)); +} + +void N64Recomp::CGenerator::emit_cop1_cs_write(std::ostream& output_file, int reg) const { + fmt::print(output_file, "rounding_mode = ({}) & 0x3;\n", gpr_to_string(reg)); +} + +void N64Recomp::CGenerator::emit_muldiv(std::ostream& output_file, InstrId instr_id, int reg1, int reg2) const { + switch (instr_id) { + case InstrId::cpu_mult: + fmt::print(output_file, "result = S64(S32({})) * S64(S32({})); lo = S32(result >> 0); hi = S32(result >> 32);\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_dmult: + fmt::print(output_file, "DMULT(S64({}), S64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_multu: + fmt::print(output_file, "result = U64(U32({})) * U64(U32({})); lo = S32(result >> 0); hi = S32(result >> 32);\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_dmultu: + fmt::print(output_file, "DMULTU(U64({}), U64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_div: + // Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1 + fmt::print(output_file, "lo = S32(S64(S32({0})) / S64(S32({1}))); hi = S32(S64(S32({0})) % S64(S32({1})));\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_ddiv: + fmt::print(output_file, "DDIV(S64({}), S64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_divu: + fmt::print(output_file, "lo = S32(U32({0}) / U32({1})); hi = S32(U32({0}) % U32({1}));\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + case InstrId::cpu_ddivu: + fmt::print(output_file, "DDIVU(U64({}), U64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2)); + break; + } +} + +void N64Recomp::CGenerator::emit_syscall(std::ostream& output_file, uint32_t instr_vram) const { + fmt::print(output_file, "recomp_syscall_handler(rdram, ctx, 0x{:08X});\n", instr_vram); +} + +void N64Recomp::CGenerator::emit_do_break(std::ostream& output_file, uint32_t instr_vram) const { + fmt::print(output_file, "do_break({});\n", instr_vram); +} + +void N64Recomp::CGenerator::emit_pause_self(std::ostream& output_file) const { + fmt::print(output_file, "pause_self(rdram);\n"); +} + +void N64Recomp::CGenerator::emit_trigger_event(std::ostream& output_file, size_t event_index) const { + fmt::print(output_file, "recomp_trigger_event(rdram, ctx, base_event_index + {});\n", event_index); +} + +void N64Recomp::CGenerator::emit_comment(std::ostream& output_file, const std::string& comment) const { + fmt::print(output_file, "// {}\n", comment); +} + void N64Recomp::CGenerator::process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const { // Thread local variables to prevent allocations when possible. // TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations. diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 1faef25..dcee470 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -109,8 +109,8 @@ std::string_view ctx_gpr_prefix(int reg) { return ""; } -// Major TODO, this function grew very organically and needs to be cleaned up. Ideally, it'll get split up into some sort of lookup table grouped by similar instruction types. -bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set& skipped_insns, size_t instr_index, const std::vector& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span> static_funcs_out) { +template +bool process_instruction(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set& skipped_insns, size_t instr_index, const std::vector& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span> static_funcs_out) { using namespace N64Recomp; const auto& section = context.sections[func.section_index]; @@ -132,12 +132,13 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun } // Output a comment with the original instruction + print_indent(); if (instr.isBranch() || instr.getUniqueId() == InstrId::cpu_j) { - fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric()))); + generator.emit_comment(output_file, fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())))); } else if (instr.getUniqueId() == InstrId::cpu_jal) { - fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric()))); + generator.emit_comment(output_file, fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())))); } else { - fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0)); + generator.emit_comment(output_file, fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0))); } if (skipped_insns.contains(instr_vram)) { @@ -206,13 +207,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun } } - auto print_line = [&](fmt::format_string fmt_str, Ts ...args) { - print_indent(); - fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...)); - fmt::print(output_file, ";\n"); - }; - - auto print_unconditional_branch = [&](fmt::format_string fmt_str, Ts ...args) { + auto process_delay_slot = [&](bool use_indent) { if (instr_index < instructions.size() - 1) { bool dummy_needs_link_branch; bool dummy_is_branch_likely; @@ -221,35 +216,76 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { next_reloc_index++; } - if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) { + if (!process_instruction(generator, context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, use_indent, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) { return false; } } - print_indent(); - fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...)); - if (needs_link_branch) { - fmt::print(output_file, ";\n goto after_{};\n", link_branch_index); - } else { - fmt::print(output_file, ";\n"); - } return true; }; - auto print_func_call = [reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, §ion, &func, &static_funcs_out, &needs_link_branch, &print_unconditional_branch] - (uint32_t target_func_vram, bool link_branch = true, bool indent = false) + auto print_link_branch = [&]() { + if (needs_link_branch) { + print_indent(); + generator.emit_goto(output_file, fmt::format("after_{}", link_branch_index)); + } + }; + + auto print_return_with_delay_slot = [&]() { + if (!process_delay_slot(false)) { + return false; + } + print_indent(); + generator.emit_return(output_file); + print_link_branch(); + return true; + }; + + auto print_goto_with_delay_slot = [&](const std::string& target) { + if (!process_delay_slot(false)) { + return false; + } + print_indent(); + generator.emit_goto(output_file, target); + print_link_branch(); + return true; + }; + + auto print_func_call_by_register = [&](int reg) { + if (!process_delay_slot(false)) { + return false; + } + print_indent(); + generator.emit_function_call_by_register(output_file, reg); + print_link_branch(); + return true; + }; + + auto print_func_call_lookup = [&](uint32_t target_vram) { + if (!process_delay_slot(false)) { + return false; + } + print_indent(); + generator.emit_function_call_lookup(output_file, target_vram); + print_link_branch(); + return true; + }; + + auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, §ion, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &output_file, &print_link_branch] + (uint32_t target_func_vram, bool tail_call = false, bool indent = false) { + bool call_by_lookup = false; // Event symbol, emit a call to the runtime to trigger this event. if (reloc_section == N64Recomp::SectionEvent) { - needs_link_branch = link_branch; + needs_link_branch = !tail_call; if (indent) { - if (!print_unconditional_branch(" recomp_trigger_event(rdram, ctx, base_event_index + {})", reloc_reference_symbol)) { - return false; - } - } else { - if (!print_unconditional_branch("recomp_trigger_event(rdram, ctx, base_event_index + {})", reloc_reference_symbol)) { - return false; - } + print_indent(); } + if (!process_delay_slot(false)) { + return false; + } + print_indent(); + generator.emit_trigger_event(output_file, reloc_reference_symbol); + print_link_branch(); } // Normal symbol or reference symbol, else { @@ -290,59 +326,63 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun // Relocation isn't necessary for jumps inside a relocatable section, as this code path will never run if the target vram // is in the current function's section (see the branch for `in_current_section` above). // If a game ever needs to jump between multiple relocatable sections, relocation will be necessary here. - jal_target_name = fmt::format("LOOKUP_FUNC(0x{:08X})", target_func_vram); + call_by_lookup = true; break; case JalResolutionResult::Error: fmt::print(stderr, "Internal error when resolving jal to address 0x{:08X} in function {}. Please report this issue.\n", target_func_vram, func.name); return false; } } - needs_link_branch = link_branch; + needs_link_branch = !tail_call; if (indent) { - if (!print_unconditional_branch(" {}(rdram, ctx)", jal_target_name)) { - return false; - } - } else { - if (!print_unconditional_branch("{}(rdram, ctx)", jal_target_name)) { - return false; - } + print_indent(); } + if (!process_delay_slot(false)) { + return false; + } + print_indent(); + if (call_by_lookup) { + generator.emit_function_call_lookup(output_file, target_func_vram); + } + else { + generator.emit_function_call_by_name(output_file, jal_target_name); + } + print_link_branch(); } return true; }; auto print_branch = [&](uint32_t branch_target) { + // If the branch target is outside the current function, check if it can be treated as a tail call. if (branch_target < func.vram || branch_target >= func_vram_end) { + // If the branch target is the start of some known function, this can be handled as a tail call. // FIXME: how to deal with static functions? if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); - if (!print_func_call(branch_target, false, true)) { + if (!print_func_call_by_address(branch_target, true, true)) { return false; } - print_line(" return"); - fmt::print(output_file, " }}\n"); + print_indent(); + generator.emit_return(output_file); + print_indent(); + generator.emit_branch_close(output_file); return true; } fmt::print(stderr, "[Warn] Function {} is branching outside of the function (to 0x{:08X})\n", func.name, branch_target); } - if (instr_index < instructions.size() - 1) { - bool dummy_needs_link_branch; - bool dummy_is_branch_likely; - size_t next_reloc_index = reloc_index; - uint32_t next_vram = instr_vram + 4; - if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { - next_reloc_index++; - } - if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) { - return false; - } + if (!process_delay_slot(true)) { + return false; } - fmt::print(output_file, " goto L_{:08X};\n", branch_target); + print_indent(); + print_indent(); + generator.emit_goto(output_file, fmt::format("L_{:08X}", branch_target)); if (needs_link_branch) { - fmt::print(output_file, " goto after_{};\n", link_branch_index); + print_indent(); + print_indent(); + generator.emit_goto(output_file, fmt::format("after_{}", link_branch_index)); } return true; }; @@ -375,7 +415,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun Cop0Reg reg = instr.Get_cop0d(); switch (reg) { case Cop0Reg::COP0_Status: - print_line("{}{} = cop0_status_read(ctx)", ctx_gpr_prefix(rt), rt); + print_indent(); + generator.emit_cop0_status_read(output_file, rt); break; default: fmt::print(stderr, "Unhandled cop0 register in mfc0: {}\n", (int)reg); @@ -388,7 +429,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun Cop0Reg reg = instr.Get_cop0d(); switch (reg) { case Cop0Reg::COP0_Status: - print_line("cop0_status_write(ctx, {}{})", ctx_gpr_prefix(rt), rt); + print_indent(); + generator.emit_cop0_status_write(output_file, rt); break; default: fmt::print(stderr, "Unhandled cop0 register in mtc0: {}\n", (int)reg); @@ -408,38 +450,25 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun // If so, create a temp to preserve the addend register's value if (find_result != stats.jump_tables.end()) { const N64Recomp::JumpTable& cur_jtbl = *find_result; - print_line("gpr jr_addend_{:08X} = {}{}", cur_jtbl.jr_vram, ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg); + print_indent(); + generator.emit_variable_declaration(output_file, fmt::format("jr_addend_{:08X}", cur_jtbl.jr_vram), cur_jtbl.addend_reg); } } break; case InstrId::cpu_mult: - print_line("result = S64(S32({}{})) * S64(S32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_dmult: - print_line("DMULT(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_multu: - print_line("result = U64(U32({}{})) * U64(U32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_dmultu: - print_line("DMULTU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_div: - // Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1 - print_line("lo = S32(S64(S32({}{})) / S64(S32({}{}))); hi = S32(S64(S32({}{})) % S64(S32({}{})))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_ddiv: - print_line("DDIV(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_divu: - print_line("lo = S32(U32({}{}) / U32({}{})); hi = S32(U32({}{}) % U32({}{}))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - break; case InstrId::cpu_ddivu: - print_line("DDIVU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + print_indent(); + generator.emit_muldiv(output_file, instr.getUniqueId(), rs, rt); break; // Branches case InstrId::cpu_jal: - if (!print_func_call(instr.getBranchVramGeneric())) { + if (!print_func_call_by_address(instr.getBranchVramGeneric())) { return false; } break; @@ -450,18 +479,19 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun return false; } needs_link_branch = true; - print_unconditional_branch("LOOKUP_FUNC({}{})(rdram, ctx)", ctx_gpr_prefix(rs), rs); + print_func_call_by_register(rs); break; case InstrId::cpu_j: case InstrId::cpu_b: { uint32_t branch_target = instr.getBranchVramGeneric(); if (branch_target == instr_vram) { - print_line("pause_self(rdram)"); + print_indent(); + generator.emit_pause_self(output_file); } // Check if the branch is within this function else if (branch_target >= func.vram && branch_target < func_vram_end) { - print_unconditional_branch("goto L_{:08X}", branch_target); + print_goto_with_delay_slot(fmt::format("L_{:08X}", branch_target)); } // This may be a tail call in the middle of the control flow due to a previous check // For example: @@ -476,11 +506,12 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun // ``` // FIXME: how to deal with static functions? else if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { - fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); - if (!print_func_call(branch_target, false)) { + fmt::print("[Info] Tail call in {} to 0x{:08X}\n", func.name, branch_target); + if (!print_func_call_by_address(branch_target, true)) { return false; } - print_line("return"); + print_indent(); + generator.emit_return(output_file); } else { fmt::print(stderr, "Unhandled branch in {} at 0x{:08X} to 0x{:08X}\n", func.name, instr_vram, branch_target); @@ -490,7 +521,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun break; case InstrId::cpu_jr: if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { - print_unconditional_branch("return"); + print_return_with_delay_slot(); } else { auto jtbl_find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(), [instr_vram](const N64Recomp::JumpTable& jtbl) { @@ -499,58 +530,41 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun if (jtbl_find_result != stats.jump_tables.end()) { const N64Recomp::JumpTable& cur_jtbl = *jtbl_find_result; - bool dummy_needs_link_branch, dummy_is_branch_likely; - size_t next_reloc_index = reloc_index; - uint32_t next_vram = instr_vram + 4; - if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { - next_reloc_index++; - } - if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) { + if (!process_delay_slot(false)) { return false; } print_indent(); - fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram); + generator.emit_switch(output_file, fmt::format("jr_addend_{:08X}", cur_jtbl.jr_vram), 2); for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) { print_indent(); - print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]); + print_indent(); + generator.emit_case(output_file, entry_index, fmt::format("L_{:08X}", cur_jtbl.entries[entry_index])); } print_indent(); - print_line("default: switch_error(__func__, 0x{:08X}, 0x{:08X})", instr_vram, cur_jtbl.vram); print_indent(); - fmt::print(output_file, "}}\n"); + generator.emit_switch_error(output_file, instr_vram, cur_jtbl.vram); + print_indent(); + generator.emit_switch_close(output_file); break; } - auto jump_find_result = std::find_if(stats.absolute_jumps.begin(), stats.absolute_jumps.end(), - [instr_vram](const N64Recomp::AbsoluteJump& jump) { - return jump.instruction_vram == instr_vram; - }); - - if (jump_find_result != stats.absolute_jumps.end()) { - print_unconditional_branch("LOOKUP_FUNC({})(rdram, ctx)", (uint64_t)(int32_t)jump_find_result->jump_target); - // jr doesn't link so it acts like a tail call, meaning we should return directly after the jump returns - print_line("return"); - break; - } - - bool is_tail_call = instr_vram == func_vram_end - 2 * sizeof(func.words[0]); - if (is_tail_call) { - fmt::print("Indirect tail call in {}\n", func.name); - print_unconditional_branch("LOOKUP_FUNC({}{})(rdram, ctx)", ctx_gpr_prefix(rs), rs); - print_line("return"); - break; - } - - fmt::print(stderr, "No jump table found for jr at 0x{:08X} and not tail call\n", instr_vram); + fmt::print("[Info] Indirect tail call in {}\n", func.name); + print_func_call_by_register(rs); + print_indent(); + generator.emit_return(output_file); + break; } break; case InstrId::cpu_syscall: - print_line("recomp_syscall_handler(rdram, ctx, 0x{:08X})", instr_vram); + print_indent(); + generator.emit_syscall(output_file, instr_vram); // syscalls don't link, so treat it like a tail call - print_line("return"); + print_indent(); + generator.emit_return(output_file); break; case InstrId::cpu_break: - print_line("do_break({})", instr_vram); + print_indent(); + generator.emit_do_break(output_file, instr_vram); break; // Cop1 rounding mode @@ -559,21 +573,22 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun fmt::print(stderr, "Invalid FP control register for ctc1: {}\n", cop1_cs); return false; } - print_line("rounding_mode = ({}{}) & 0x3", ctx_gpr_prefix(rt), rt); + print_indent(); + generator.emit_cop1_cs_write(output_file, rt); break; case InstrId::cpu_cfc1: if (cop1_cs != 31) { fmt::print(stderr, "Invalid FP control register for cfc1: {}\n", cop1_cs); return false; } - print_line("{}{} = rounding_mode", ctx_gpr_prefix(rt), rt); + print_indent(); + generator.emit_cop1_cs_read(output_file, rt); break; default: handled = false; break; } - CGenerator generator{}; InstructionContext instruction_context{}; instruction_context.rd = rd; instruction_context.rs = rs; @@ -658,7 +673,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun if (op.check_nan) { do_check_nan(output_file, generator, instruction_context, op.operands.operands[0]); do_check_nan(output_file, generator, instruction_context, op.operands.operands[1]); - fmt::print(output_file, "\n "); + fmt::print(output_file, "\n"); + print_indent(); } generator.process_binary_op(output_file, op, instruction_context); @@ -677,7 +693,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun if (op.check_nan) { do_check_nan(output_file, generator, instruction_context, op.input); - fmt::print(output_file, "\n "); + fmt::print(output_file, "\n"); + print_indent(); } generator.process_unary_op(output_file, op, instruction_context); @@ -691,7 +708,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun print_indent(); if (find_conditional_branch_it->second.link) { - if (!print_func_call(instr.getBranchVramGeneric())) { + if (!print_func_call_by_address(instr.getBranchVramGeneric())) { return false; } } @@ -701,6 +718,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun } } + print_indent(); generator.emit_branch_close(output_file); is_branch_likely = find_conditional_branch_it->second.likely; @@ -727,23 +745,19 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun // TODO is this used? if (emit_link_branch) { - fmt::print(output_file, " after_{}:\n", link_branch_index); + print_indent(); + generator.emit_label(output_file, fmt::format("after_{}", link_branch_index)); } return true; } -bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span> static_funcs_out, bool tag_reference_relocs) { +template +bool recompile_function_impl(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span> static_funcs_out, bool tag_reference_relocs) { //fmt::print("Recompiling {}\n", func.name); std::vector instructions; - fmt::print(output_file, - "RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n" - // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output - " uint64_t hi = 0, lo = 0, result = 0;\n" - " unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n" - " int c1cs = 0;\n", // cop1 conditional signal - func.name); + generator.emit_function_start(output_file, func.name); if (context.trace_mode) { fmt::print(output_file, @@ -808,11 +822,11 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R bool is_branch_likely = false; // If we're in the delay slot of a likely instruction, emit a goto to skip the instruction before any labels if (in_likely_delay_slot) { - fmt::print(output_file, " goto skip_{};\n", num_likely_branches); + generator.emit_goto(output_file, fmt::format("skip_{}", num_likely_branches)); } // If there are any other branch labels to insert and we're at the next one, insert it if (cur_label != branch_labels.end() && vram >= *cur_label) { - fmt::print(output_file, "L_{:08X}:\n", *cur_label); + generator.emit_label(output_file, fmt::format("L_{:08X}", *cur_label)); ++cur_label; } @@ -822,7 +836,7 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R } // Process the current instruction and check for errors - if (process_instruction(context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) { + if (process_instruction(generator, context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) { fmt::print(stderr, "Error in recompiling {}, clearing output file\n", func.name); output_file.clear(); return false; @@ -833,7 +847,8 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R } // Now that the instruction has been processed, emit a skip label for the likely branch if needed if (in_likely_delay_slot) { - fmt::print(output_file, " skip_{}:\n", num_likely_branches); + fmt::print(output_file, " "); + generator.emit_label(output_file, fmt::format("skip_{}", num_likely_branches)); num_likely_branches++; } // Mark the next instruction as being in a likely delay slot if the @@ -844,7 +859,13 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R } // Terminate the function - fmt::print(output_file, ";}}\n"); + generator.emit_function_end(output_file); return true; } + +// Wrap the templated function with CGenerator as the template parameter. +bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span> static_funcs_out, bool tag_reference_relocs) { + CGenerator generator{}; + return recompile_function_impl(generator, context, func, output_file, static_funcs_out, tag_reference_relocs); +}