Implement live recompiler (#114)

This commit implements the "live recompiler", which is another backend for the recompiler that generates platform-specific assembly at runtime. This is still static recompilation as opposed to dynamic recompilation, as it still requires information about the binary to recompile and leverages the same static analysis that the C recompiler uses. However, similarly to dynamic recompilation it's aimed at recompiling binaries at runtime, mainly for modding purposes.

The live recompiler leverages a library called sljit to generate platform-specific code. This library provides an API that's implemented on several platforms, including the main targets of this component: x86_64 and ARM64.

Performance is expected to be slower than the C recompiler, but should still be plenty fast enough for running large amounts of recompiled code without an issue. Considering these ROMs can often be run through an interpreter and still hit their full speed, performance should not be a concern for running native code even if it's less optimal than the C recompiler's codegen.

As mentioned earlier, the main use of the live recompiler will be for loading mods in the N64Recomp runtime. This makes it so that modders don't need to ship platform-specific binaries for their mods, and allows fixing bugs with recompilation down the line without requiring modders to update their binaries.

This PR also includes a utility for testing the live recompiler. It accepts binaries in a custom format which contain the instructions, input data, and target data. Documentation for the test format as well as most of the tests that were used to validate the live recompiler can be found here. The few remaining tests were hacked together binaries that I put together very hastily, so they need to be cleaned up and will probably be uploaded at a later date. The only test in that suite that doesn't currently succeed is the div test, due to unknown behavior when the two operands aren't properly sign extended to 64 bits. This has no bearing on practical usage, since the inputs will always be sign extended as expected.
This commit is contained in:
Wiseguy 2024-12-31 16:11:40 -05:00 committed by GitHub
parent 0d0e93e979
commit 66062a06e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 3452 additions and 385 deletions

View file

@ -4,7 +4,7 @@
#include "rabbitizer.hpp"
#include "fmt/format.h"
#include "n64recomp.h"
#include "recompiler/context.h"
#include "analysis.h"
extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue);
@ -194,21 +194,11 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const N64Recom
reg_states[rs].loaded_lw_vram,
reg_states[rs].loaded_addu_vram,
instr.getVram(),
0, // section index gets filled in later
std::vector<uint32_t>{}
);
} else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) {
uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui;
stats.absolute_jumps.emplace_back(
address,
instr.getVram()
);
}
// Allow tail calls (TODO account for trailing nops due to bad function splits)
else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) {
// Inconclusive analysis
fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name);
return false;
}
// TODO stricter validation on tail calls, since not all indirect jumps can be treated as one.
break;
default:
if (instr.modifiesRd()) {
@ -256,6 +246,7 @@ bool N64Recomp::analyze_function(const N64Recomp::Context& context, const N64Rec
// TODO this assumes that the jump table is in the same section as the function itself
cur_jtbl.rom = cur_jtbl.vram + func.rom - func.vram;
cur_jtbl.section_index = func.section_index;
while (vram < end_address) {
// Retrieve the current entry of the jump table

View file

@ -4,22 +4,9 @@
#include <cstdint>
#include <vector>
#include "n64recomp.h"
#include "recompiler/context.h"
namespace N64Recomp {
struct JumpTable {
uint32_t vram;
uint32_t addend_reg;
uint32_t rom;
uint32_t lw_vram;
uint32_t addu_vram;
uint32_t jr_vram;
std::vector<uint32_t> entries;
JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, std::vector<uint32_t>&& entries)
: vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), entries(std::move(entries)) {}
};
struct AbsoluteJump {
uint32_t jump_target;
uint32_t instruction_vram;
@ -29,7 +16,6 @@ namespace N64Recomp {
struct FunctionStats {
std::vector<JumpTable> jump_tables;
std::vector<AbsoluteJump> absolute_jumps;
};
bool analyze_function(const Context& context, const Function& function, const std::vector<rabbitizer::InstructionCpu>& instructions, FunctionStats& stats);

View file

@ -4,11 +4,11 @@
#include "fmt/format.h"
#include "fmt/ostream.h"
#include "generator.h"
#include "recompiler/generator.h"
struct BinaryOpFields { std::string func_string; std::string infix_string; };
std::vector<BinaryOpFields> c_op_fields = []() {
static std::vector<BinaryOpFields> c_op_fields = []() {
std::vector<BinaryOpFields> ret{};
ret.resize(static_cast<size_t>(N64Recomp::BinaryOpType::COUNT));
std::vector<char> ops_setup{};
@ -45,9 +45,15 @@ std::vector<BinaryOpFields> c_op_fields = []() {
setup_op(N64Recomp::BinaryOpType::Sra32, "S32", ">>"); // Arithmetic aspect will be taken care of by unary op for first operand.
setup_op(N64Recomp::BinaryOpType::Sra64, "", ">>"); // Arithmetic aspect will be taken care of by unary op for first operand.
setup_op(N64Recomp::BinaryOpType::Equal, "", "==");
setup_op(N64Recomp::BinaryOpType::EqualFloat,"", "==");
setup_op(N64Recomp::BinaryOpType::EqualDouble,"", "==");
setup_op(N64Recomp::BinaryOpType::NotEqual, "", "!=");
setup_op(N64Recomp::BinaryOpType::Less, "", "<");
setup_op(N64Recomp::BinaryOpType::LessFloat, "", "<");
setup_op(N64Recomp::BinaryOpType::LessDouble,"", "<");
setup_op(N64Recomp::BinaryOpType::LessEq, "", "<=");
setup_op(N64Recomp::BinaryOpType::LessEqFloat,"", "<=");
setup_op(N64Recomp::BinaryOpType::LessEqDouble,"", "<=");
setup_op(N64Recomp::BinaryOpType::Greater, "", ">");
setup_op(N64Recomp::BinaryOpType::GreaterEq, "", ">=");
setup_op(N64Recomp::BinaryOpType::LD, "LD", "");
@ -72,22 +78,22 @@ std::vector<BinaryOpFields> c_op_fields = []() {
return ret;
}();
std::string gpr_to_string(int gpr_index) {
static std::string gpr_to_string(int gpr_index) {
if (gpr_index == 0) {
return "0";
}
return fmt::format("ctx->r{}", gpr_index);
}
std::string fpr_to_string(int fpr_index) {
static std::string fpr_to_string(int fpr_index) {
return fmt::format("ctx->f{}.fl", fpr_index);
}
std::string fpr_double_to_string(int fpr_index) {
static std::string fpr_double_to_string(int fpr_index) {
return fmt::format("ctx->f{}.d", fpr_index);
}
std::string fpr_u32l_to_string(int fpr_index) {
static std::string fpr_u32l_to_string(int fpr_index) {
if (fpr_index & 1) {
return fmt::format("ctx->f_odd[({} - 1) * 2]", fpr_index);
}
@ -96,11 +102,11 @@ std::string fpr_u32l_to_string(int fpr_index) {
}
}
std::string fpr_u64_to_string(int fpr_index) {
static std::string fpr_u64_to_string(int fpr_index) {
return fmt::format("ctx->f{}.u64", fpr_index);
}
std::string unsigned_reloc(const N64Recomp::InstructionContext& context) {
static std::string unsigned_reloc(const N64Recomp::InstructionContext& context) {
switch (context.reloc_type) {
case N64Recomp::RelocType::R_MIPS_HI16:
return fmt::format("{}RELOC_HI16({}, {:#X})",
@ -113,7 +119,7 @@ std::string unsigned_reloc(const N64Recomp::InstructionContext& context) {
}
}
std::string signed_reloc(const N64Recomp::InstructionContext& context) {
static std::string signed_reloc(const N64Recomp::InstructionContext& context) {
return "(int16_t)" + unsigned_reloc(context);
}
@ -223,12 +229,6 @@ void N64Recomp::CGenerator::get_operand_string(Operand operand, UnaryOpType oper
case UnaryOpType::ToU64:
// Nothing to do here, they're already U64
break;
case UnaryOpType::NegateS32:
assert(false);
break;
case UnaryOpType::NegateS64:
assert(false);
break;
case UnaryOpType::Lui:
operand_string = "S32(" + operand_string + " << 16)";
break;
@ -241,7 +241,10 @@ void N64Recomp::CGenerator::get_operand_string(Operand operand, UnaryOpType oper
case UnaryOpType::ToInt32:
operand_string = "(int32_t)" + operand_string;
break;
case UnaryOpType::Negate:
case UnaryOpType::NegateFloat:
operand_string = "-" + operand_string;
break;
case UnaryOpType::NegateDouble:
operand_string = "-" + operand_string;
break;
case UnaryOpType::AbsFloat:
@ -292,24 +295,48 @@ void N64Recomp::CGenerator::get_operand_string(Operand operand, UnaryOpType oper
case UnaryOpType::TruncateWFromD:
operand_string = "TRUNC_W_D(" + operand_string + ")";
break;
case UnaryOpType::TruncateLFromS:
operand_string = "TRUNC_L_S(" + operand_string + ")";
break;
case UnaryOpType::TruncateLFromD:
operand_string = "TRUNC_L_D(" + operand_string + ")";
break;
case UnaryOpType::RoundWFromS:
operand_string = "lroundf(" + operand_string + ")";
break;
case UnaryOpType::RoundWFromD:
operand_string = "lround(" + operand_string + ")";
break;
case UnaryOpType::RoundLFromS:
operand_string = "llroundf(" + operand_string + ")";
break;
case UnaryOpType::RoundLFromD:
operand_string = "llround(" + operand_string + ")";
break;
case UnaryOpType::CeilWFromS:
operand_string = "S32(ceilf(" + operand_string + "))";
break;
case UnaryOpType::CeilWFromD:
operand_string = "S32(ceil(" + operand_string + "))";
break;
case UnaryOpType::CeilLFromS:
operand_string = "S64(ceilf(" + operand_string + "))";
break;
case UnaryOpType::CeilLFromD:
operand_string = "S64(ceil(" + operand_string + "))";
break;
case UnaryOpType::FloorWFromS:
operand_string = "S32(floorf(" + operand_string + "))";
break;
case UnaryOpType::FloorWFromD:
operand_string = "S32(floor(" + operand_string + "))";
break;
case UnaryOpType::FloorLFromS:
operand_string = "S64(floorf(" + operand_string + "))";
break;
case UnaryOpType::FloorLFromD:
operand_string = "S64(floor(" + operand_string + "))";
break;
}
}
@ -333,10 +360,10 @@ void N64Recomp::CGenerator::get_binary_expr_string(BinaryOpType type, const Bina
expr_string = fmt::format("{} {} {} ? 1 : 0", input_a, infix_string, input_b);
}
else if (type == BinaryOpType::Equal && operands.operands[1] == Operand::Zero && operands.operand_operations[1] == UnaryOpType::None) {
expr_string = input_a;
expr_string = "!" + input_a;
}
else if (type == BinaryOpType::NotEqual && operands.operands[1] == Operand::Zero && operands.operand_operations[1] == UnaryOpType::None) {
expr_string = "!" + input_a;
expr_string = input_a;
}
// End unnecessary cases.
@ -365,7 +392,57 @@ void N64Recomp::CGenerator::get_binary_expr_string(BinaryOpType type, const Bina
}
}
void N64Recomp::CGenerator::emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const {
void N64Recomp::CGenerator::emit_function_start(const std::string& function_name, size_t func_index) const {
fmt::print(output_file,
"RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n"
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
" uint64_t hi = 0, lo = 0, result = 0;\n"
" int c1cs = 0;\n", // cop1 conditional signal
function_name);
}
void N64Recomp::CGenerator::emit_function_end() const {
fmt::print(output_file, ";}}\n");
}
void N64Recomp::CGenerator::emit_function_call_lookup(uint32_t addr) const {
fmt::print(output_file, "LOOKUP_FUNC(0x{:08X})(rdram, ctx);\n", addr);
}
void N64Recomp::CGenerator::emit_function_call_by_register(int reg) const {
fmt::print(output_file, "LOOKUP_FUNC({})(rdram, ctx);\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const {
(void)target_section_offset;
const N64Recomp::ReferenceSymbol& sym = context.get_reference_symbol(section_index, symbol_index);
fmt::print(output_file, "{}(rdram, ctx);\n", sym.name);
}
void N64Recomp::CGenerator::emit_function_call(const Context& context, size_t function_index) const {
fmt::print(output_file, "{}(rdram, ctx);\n", context.functions[function_index].name);
}
void N64Recomp::CGenerator::emit_named_function_call(const std::string& function_name) const {
fmt::print(output_file, "{}(rdram, ctx);\n", function_name);
}
void N64Recomp::CGenerator::emit_goto(const std::string& target) const {
fmt::print(output_file,
" goto {};\n", target);
}
void N64Recomp::CGenerator::emit_label(const std::string& label_name) const {
fmt::print(output_file,
"{}:\n", label_name);
}
void N64Recomp::CGenerator::emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const {
std::string jump_variable = fmt::format("jr_addend_{:08X}", jtbl.jr_vram);
fmt::print(output_file, "gpr {} = {};\n", jump_variable, gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const {
// Thread local variables to prevent allocations when possible.
// TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations.
thread_local std::string expr_string{};
@ -373,19 +450,114 @@ void N64Recomp::CGenerator::emit_branch_condition(std::ostream& output_file, con
fmt::print(output_file, "if ({}) {{\n", expr_string);
}
void N64Recomp::CGenerator::emit_branch_close(std::ostream& output_file) const {
fmt::print(output_file, " }}\n");
void N64Recomp::CGenerator::emit_branch_close() const {
fmt::print(output_file, "}}\n");
}
void N64Recomp::CGenerator::emit_check_fr(std::ostream& output_file, int fpr) const {
void N64Recomp::CGenerator::emit_switch_close() const {
fmt::print(output_file, "}}\n");
}
void N64Recomp::CGenerator::emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const {
(void)recompiler_context;
(void)reg;
// TODO generate code to subtract the jump table address from the register's value instead.
// Once that's done, the addend temp can be deleted to simplify the generator interface.
std::string jump_variable = fmt::format("jr_addend_{:08X}", jtbl.jr_vram);
fmt::print(output_file, "switch ({} >> 2) {{\n", jump_variable);
}
void N64Recomp::CGenerator::emit_case(int case_index, const std::string& target_label) const {
fmt::print(output_file, "case {}: goto {}; break;\n", case_index, target_label);
}
void N64Recomp::CGenerator::emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const {
fmt::print(output_file, "default: switch_error(__func__, 0x{:08X}, 0x{:08X});\n", instr_vram, jtbl_vram);
}
void N64Recomp::CGenerator::emit_return() const {
fmt::print(output_file, "return;\n");
}
void N64Recomp::CGenerator::emit_check_fr(int fpr) const {
fmt::print(output_file, "CHECK_FR(ctx, {});\n ", fpr);
}
void N64Recomp::CGenerator::emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const {
void N64Recomp::CGenerator::emit_check_nan(int fpr, bool is_double) const {
fmt::print(output_file, "NAN_CHECK(ctx->f{}.{}); ", fpr, is_double ? "d" : "fl");
}
void N64Recomp::CGenerator::process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const {
void N64Recomp::CGenerator::emit_cop0_status_read(int reg) const {
fmt::print(output_file, "{} = cop0_status_read(ctx);\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_cop0_status_write(int reg) const {
fmt::print(output_file, "cop0_status_write(ctx, {});", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_cop1_cs_read(int reg) const {
fmt::print(output_file, "{} = get_cop1_cs();\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_cop1_cs_write(int reg) const {
fmt::print(output_file, "set_cop1_cs({});\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const {
switch (instr_id) {
case InstrId::cpu_mult:
fmt::print(output_file, "result = S64(S32({})) * S64(S32({})); lo = S32(result >> 0); hi = S32(result >> 32);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_dmult:
fmt::print(output_file, "DMULT(S64({}), S64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_multu:
fmt::print(output_file, "result = U64(U32({})) * U64(U32({})); lo = S32(result >> 0); hi = S32(result >> 32);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_dmultu:
fmt::print(output_file, "DMULTU(U64({}), U64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_div:
// Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1
fmt::print(output_file, "lo = S32(S64(S32({0})) / S64(S32({1}))); hi = S32(S64(S32({0})) % S64(S32({1})));\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_ddiv:
fmt::print(output_file, "DDIV(S64({}), S64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_divu:
fmt::print(output_file, "lo = S32(U32({0}) / U32({1})); hi = S32(U32({0}) % U32({1}));\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_ddivu:
fmt::print(output_file, "DDIVU(U64({}), U64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
default:
assert(false);
break;
}
}
void N64Recomp::CGenerator::emit_syscall(uint32_t instr_vram) const {
fmt::print(output_file, "recomp_syscall_handler(rdram, ctx, 0x{:08X});\n", instr_vram);
}
void N64Recomp::CGenerator::emit_do_break(uint32_t instr_vram) const {
fmt::print(output_file, "do_break({});\n", instr_vram);
}
void N64Recomp::CGenerator::emit_pause_self() const {
fmt::print(output_file, "pause_self(rdram);\n");
}
void N64Recomp::CGenerator::emit_trigger_event(uint32_t event_index) const {
fmt::print(output_file, "recomp_trigger_event(rdram, ctx, base_event_index + {});\n", event_index);
}
void N64Recomp::CGenerator::emit_comment(const std::string& comment) const {
fmt::print(output_file, "// {}\n", comment);
}
void N64Recomp::CGenerator::process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const {
// Thread local variables to prevent allocations when possible.
// TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations.
thread_local std::string output{};
@ -395,7 +567,7 @@ void N64Recomp::CGenerator::process_binary_op(std::ostream& output_file, const B
fmt::print(output_file, "{} = {};\n", output, expression);
}
void N64Recomp::CGenerator::process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const {
void N64Recomp::CGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const {
// Thread local variables to prevent allocations when possible.
// TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations.
thread_local std::string output{};
@ -406,7 +578,7 @@ void N64Recomp::CGenerator::process_unary_op(std::ostream& output_file, const Un
fmt::print(output_file, "{} = {};\n", output, input);
}
void N64Recomp::CGenerator::process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const {
void N64Recomp::CGenerator::process_store_op(const StoreOp& op, const InstructionContext& ctx) const {
// Thread local variables to prevent allocations when possible.
// TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations.
thread_local std::string base_str{};

View file

@ -3,7 +3,7 @@
#include <toml++/toml.hpp>
#include "fmt/format.h"
#include "config.h"
#include "n64recomp.h"
#include "recompiler/context.h"
std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
if (!child.empty()) {
@ -375,7 +375,7 @@ N64Recomp::Config::Config(const char* path) {
recomp_include = recomp_include_opt.value();
}
else {
recomp_include = "#include \"librecomp/recomp.h\"";
recomp_include = "#include \"recomp.h\"";
}
std::optional<int32_t> funcs_per_file_opt = input_data["functions_per_output_file"].value<int32_t>();

View file

@ -3,7 +3,7 @@
#include "fmt/format.h"
// #include "fmt/ostream.h"
#include "n64recomp.h"
#include "recompiler/context.h"
#include "elfio/elfio.hpp"
bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms) {

View file

@ -9,7 +9,7 @@
#include "fmt/format.h"
#include "fmt/ostream.h"
#include "n64recomp.h"
#include "recompiler/context.h"
#include "config.h"
#include <set>
@ -111,7 +111,7 @@ bool compare_files(const std::filesystem::path& file1_path, const std::filesyste
return std::equal(begin1, std::istreambuf_iterator<char>(), begin2); //Second argument is end-of-range iterator
}
bool recompile_single_function(const N64Recomp::Context& context, const N64Recomp::Function& func, const std::string& recomp_include, const std::filesystem::path& output_path, std::span<std::vector<uint32_t>> static_funcs_out) {
bool recompile_single_function(const N64Recomp::Context& context, size_t func_index, const std::string& recomp_include, const std::filesystem::path& output_path, std::span<std::vector<uint32_t>> static_funcs_out) {
// Open the temporary output file
std::filesystem::path temp_path = output_path;
temp_path.replace_extension(".tmp");
@ -127,7 +127,7 @@ bool recompile_single_function(const N64Recomp::Context& context, const N64Recom
"\n",
recomp_include);
if (!N64Recomp::recompile_function(context, func, output_file, static_funcs_out, false)) {
if (!N64Recomp::recompile_function(context, func_index, output_file, static_funcs_out, false)) {
return false;
}
@ -725,7 +725,7 @@ int main(int argc, char** argv) {
// Recompile the function.
if (config.single_file_output || config.functions_per_output_file > 1) {
result = N64Recomp::recompile_function(context, func, current_output_file, static_funcs_by_section, false);
result = N64Recomp::recompile_function(context, i, current_output_file, static_funcs_by_section, false);
if (!config.single_file_output) {
cur_file_function_count++;
if (cur_file_function_count >= config.functions_per_output_file) {
@ -734,7 +734,7 @@ int main(int argc, char** argv) {
}
}
else {
result = recompile_single_function(context, func, config.recomp_include, config.output_func_path / (func.name + ".c"), static_funcs_by_section);
result = recompile_single_function(context, i, config.recomp_include, config.output_func_path / (func.name + ".c"), static_funcs_by_section);
}
if (result == false) {
fmt::print(stderr, "Error recompiling {}\n", func.name);
@ -797,22 +797,25 @@ int main(int argc, char** argv) {
std::vector<uint32_t> insn_words((cur_func_end - static_func_addr) / sizeof(uint32_t));
insn_words.assign(func_rom_start, func_rom_start + insn_words.size());
N64Recomp::Function func {
// Create the new function and add it to the context.
size_t new_func_index = context.functions.size();
context.functions.emplace_back(
static_func_addr,
rom_addr,
std::move(insn_words),
fmt::format("static_{}_{:08X}", section_index, static_func_addr),
static_cast<uint16_t>(section_index),
false
};
);
const N64Recomp::Function& new_func = context.functions[new_func_index];
fmt::print(func_header_file,
"void {}(uint8_t* rdram, recomp_context* ctx);\n", func.name);
"void {}(uint8_t* rdram, recomp_context* ctx);\n", new_func.name);
bool result;
size_t prev_num_statics = static_funcs_by_section[func.section_index].size();
size_t prev_num_statics = static_funcs_by_section[new_func.section_index].size();
if (config.single_file_output || config.functions_per_output_file > 1) {
result = N64Recomp::recompile_function(context, func, current_output_file, static_funcs_by_section, false);
result = N64Recomp::recompile_function(context, new_func_index, current_output_file, static_funcs_by_section, false);
if (!config.single_file_output) {
cur_file_function_count++;
if (cur_file_function_count >= config.functions_per_output_file) {
@ -821,14 +824,14 @@ int main(int argc, char** argv) {
}
}
else {
result = recompile_single_function(context, func, config.recomp_include, config.output_func_path / (func.name + ".c"), static_funcs_by_section);
result = recompile_single_function(context, new_func_index, config.recomp_include, config.output_func_path / (new_func.name + ".c"), static_funcs_by_section);
}
// Add any new static functions that were found while recompiling this one.
size_t cur_num_statics = static_funcs_by_section[func.section_index].size();
size_t cur_num_statics = static_funcs_by_section[new_func.section_index].size();
if (cur_num_statics != prev_num_statics) {
for (size_t new_static_index = prev_num_statics; new_static_index < cur_num_statics; new_static_index++) {
uint32_t new_static_vram = static_funcs_by_section[func.section_index][new_static_index];
uint32_t new_static_vram = static_funcs_by_section[new_func.section_index][new_static_index];
if (!statics_set.contains(new_static_vram)) {
statics_set.emplace(new_static_vram);
@ -838,7 +841,7 @@ int main(int argc, char** argv) {
}
if (result == false) {
fmt::print(stderr, "Error recompiling {}\n", func.name);
fmt::print(stderr, "Error recompiling {}\n", new_func.name);
std::exit(EXIT_FAILURE);
}
}

View file

@ -1,6 +1,6 @@
#include <cstring>
#include "n64recomp.h"
#include "recompiler/context.h"
struct FileHeader {
char magic[8]; // N64RSYMS

View file

@ -1,4 +1,4 @@
#include "operations.h"
#include "recompiler/operations.h"
namespace N64Recomp {
const std::unordered_map<InstrId, UnaryOp> unary_ops {
@ -12,8 +12,8 @@ namespace N64Recomp {
// Float operations
{ InstrId::cpu_mov_s, { UnaryOpType::None, Operand::Fd, Operand::Fs, true } },
{ InstrId::cpu_mov_d, { UnaryOpType::None, Operand::FdDouble, Operand::FsDouble, true } },
{ InstrId::cpu_neg_s, { UnaryOpType::Negate, Operand::Fd, Operand::Fs, true, true } },
{ InstrId::cpu_neg_d, { UnaryOpType::Negate, Operand::FdDouble, Operand::FsDouble, true, true } },
{ InstrId::cpu_neg_s, { UnaryOpType::NegateFloat, Operand::Fd, Operand::Fs, true, true } },
{ InstrId::cpu_neg_d, { UnaryOpType::NegateDouble, Operand::FdDouble, Operand::FsDouble, true, true } },
{ InstrId::cpu_abs_s, { UnaryOpType::AbsFloat, Operand::Fd, Operand::Fs, true, true } },
{ InstrId::cpu_abs_d, { UnaryOpType::AbsDouble, Operand::FdDouble, Operand::FsDouble, true, true } },
{ InstrId::cpu_sqrt_s, { UnaryOpType::SqrtFloat, Operand::Fd, Operand::Fs, true, true } },
@ -65,24 +65,22 @@ namespace N64Recomp {
{ InstrId::cpu_ori, { BinaryOpType::Or64, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Rs, Operand::ImmU16 }}} },
{ InstrId::cpu_xori, { BinaryOpType::Xor64, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Rs, Operand::ImmU16 }}} },
// Shifts
/* BUG Should mask after (change op to Sll32 and input op to ToU32) */
{ InstrId::cpu_sllv, { BinaryOpType::Sll64, Operand::Rd, {{ UnaryOpType::ToS32, UnaryOpType::Mask5 }, { Operand::Rt, Operand::Rs }}} },
{ InstrId::cpu_sllv, { BinaryOpType::Sll32, Operand::Rd, {{ UnaryOpType::None, UnaryOpType::Mask5 }, { Operand::Rt, Operand::Rs }}} },
{ InstrId::cpu_dsllv, { BinaryOpType::Sll64, Operand::Rd, {{ UnaryOpType::None, UnaryOpType::Mask6 }, { Operand::Rt, Operand::Rs }}} },
{ InstrId::cpu_srlv, { BinaryOpType::Srl32, Operand::Rd, {{ UnaryOpType::ToU32, UnaryOpType::Mask5 }, { Operand::Rt, Operand::Rs }}} },
{ InstrId::cpu_dsrlv, { BinaryOpType::Srl64, Operand::Rd, {{ UnaryOpType::ToU64, UnaryOpType::Mask6 }, { Operand::Rt, Operand::Rs }}} },
/* BUG Should mask after (change op to Sra32 and input op to ToS64) */
{ InstrId::cpu_srav, { BinaryOpType::Sra64, Operand::Rd, {{ UnaryOpType::ToS32, UnaryOpType::Mask5 }, { Operand::Rt, Operand::Rs }}} },
// Hardware bug: The input is not masked to 32 bits before right shifting, so bits from the upper half of the register will bleed into the lower half.
{ InstrId::cpu_srav, { BinaryOpType::Sra32, Operand::Rd, {{ UnaryOpType::ToS64, UnaryOpType::Mask5 }, { Operand::Rt, Operand::Rs }}} },
{ InstrId::cpu_dsrav, { BinaryOpType::Sra64, Operand::Rd, {{ UnaryOpType::ToS64, UnaryOpType::Mask6 }, { Operand::Rt, Operand::Rs }}} },
// Shifts (immediate)
/* BUG Should mask after (change op to Sll32 and input op to ToU32) */
{ InstrId::cpu_sll, { BinaryOpType::Sll64, Operand::Rd, {{ UnaryOpType::ToS32, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_sll, { BinaryOpType::Sll32, Operand::Rd, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_dsll, { BinaryOpType::Sll64, Operand::Rd, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_dsll32, { BinaryOpType::Sll64, Operand::Rd, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Rt, Operand::Sa32 }}} },
{ InstrId::cpu_srl, { BinaryOpType::Srl32, Operand::Rd, {{ UnaryOpType::ToU32, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_dsrl, { BinaryOpType::Srl64, Operand::Rd, {{ UnaryOpType::ToU64, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_dsrl32, { BinaryOpType::Srl64, Operand::Rd, {{ UnaryOpType::ToU64, UnaryOpType::None }, { Operand::Rt, Operand::Sa32 }}} },
/* BUG should cast after (change op to Sra32 and input op to ToS64) */
{ InstrId::cpu_sra, { BinaryOpType::Sra64, Operand::Rd, {{ UnaryOpType::ToS32, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
// Hardware bug: The input is not masked to 32 bits before right shifting, so bits from the upper half of the register will bleed into the lower half.
{ InstrId::cpu_sra, { BinaryOpType::Sra32, Operand::Rd, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_dsra, { BinaryOpType::Sra64, Operand::Rd, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rt, Operand::Sa }}} },
{ InstrId::cpu_dsra32, { BinaryOpType::Sra64, Operand::Rd, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rt, Operand::Sa32 }}} },
// Comparisons
@ -101,47 +99,47 @@ namespace N64Recomp {
{ InstrId::cpu_div_s, { BinaryOpType::DivFloat, Operand::Fd, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true, true } },
{ InstrId::cpu_div_d, { BinaryOpType::DivDouble, Operand::FdDouble, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true, true } },
// Float comparisons TODO remaining operations and investigate ordered/unordered and default values
{ InstrId::cpu_c_lt_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_nge_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_olt_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ult_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_lt_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_nge_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_olt_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ult_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_lt_s, { BinaryOpType::LessFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_nge_s, { BinaryOpType::LessFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_olt_s, { BinaryOpType::LessFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ult_s, { BinaryOpType::LessFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_lt_d, { BinaryOpType::LessDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_nge_d, { BinaryOpType::LessDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_olt_d, { BinaryOpType::LessDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ult_d, { BinaryOpType::LessDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_le_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngt_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ole_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ule_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_le_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngt_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ole_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ule_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_le_s, { BinaryOpType::LessEqFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngt_s, { BinaryOpType::LessEqFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ole_s, { BinaryOpType::LessEqFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ule_s, { BinaryOpType::LessEqFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_le_d, { BinaryOpType::LessEqDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngt_d, { BinaryOpType::LessEqDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ole_d, { BinaryOpType::LessEqDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ule_d, { BinaryOpType::LessEqDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_eq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ueq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngl_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_seq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_eq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ueq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngl_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_eq_s, { BinaryOpType::EqualFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ueq_s, { BinaryOpType::EqualFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngl_s, { BinaryOpType::EqualFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_seq_s, { BinaryOpType::EqualFloat, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_eq_d, { BinaryOpType::EqualDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ueq_d, { BinaryOpType::EqualDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngl_d, { BinaryOpType::EqualDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
/* TODO rename to c_seq_d when fixed in rabbitizer */
{ InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_deq_d, { BinaryOpType::EqualDouble, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
// Loads
{ InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}} },
{ InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::ImmS16, Operand::Base }}, true } },
{ InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwu, { BinaryOpType::LWU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lh, { BinaryOpType::LH, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lhu, { BinaryOpType::LHU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lb, { BinaryOpType::LB, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lbu, { BinaryOpType::LBU, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldl, { BinaryOpType::LDL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldr, { BinaryOpType::LDR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwl, { BinaryOpType::LWL, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwr, { BinaryOpType::LWR, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lwc1, { BinaryOpType::LW, Operand::FtU32L, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_ldc1, { BinaryOpType::LD, Operand::FtU64, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}, true } },
};
const std::unordered_map<InstrId, ConditionalBranchOp> conditional_branch_ops {
@ -159,10 +157,12 @@ namespace N64Recomp {
{ InstrId::cpu_bltzl, { BinaryOpType::Less, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rs, Operand::Zero }}, false, true }},
{ InstrId::cpu_bgezal, { BinaryOpType::GreaterEq, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rs, Operand::Zero }}, true, false }},
{ InstrId::cpu_bgezall, { BinaryOpType::GreaterEq, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rs, Operand::Zero }}, true, true }},
{ InstrId::cpu_bc1f, { BinaryOpType::NotEqual, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, false }},
{ InstrId::cpu_bc1fl, { BinaryOpType::NotEqual, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, true }},
{ InstrId::cpu_bc1t, { BinaryOpType::Equal, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, false }},
{ InstrId::cpu_bc1tl, { BinaryOpType::Equal, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, true }},
{ InstrId::cpu_bltzal, { BinaryOpType::Less, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rs, Operand::Zero }}, true, false }},
{ InstrId::cpu_bltzall, { BinaryOpType::Less, {{ UnaryOpType::ToS64, UnaryOpType::None }, { Operand::Rs, Operand::Zero }}, true, true }},
{ InstrId::cpu_bc1f, { BinaryOpType::Equal, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, false }},
{ InstrId::cpu_bc1fl, { BinaryOpType::Equal, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, true }},
{ InstrId::cpu_bc1t, { BinaryOpType::NotEqual, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, false }},
{ InstrId::cpu_bc1tl, { BinaryOpType::NotEqual, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Cop1cs, Operand::Zero }}, false, true }},
};
const std::unordered_map<InstrId, StoreOp> store_ops {

View file

@ -8,10 +8,10 @@
#include "fmt/format.h"
#include "fmt/ostream.h"
#include "n64recomp.h"
#include "recompiler/context.h"
#include "analysis.h"
#include "operations.h"
#include "generator.h"
#include "recompiler/operations.h"
#include "recompiler/generator.h"
enum class JalResolutionResult {
NoMatch,
@ -28,7 +28,6 @@ JalResolutionResult resolve_jal(const N64Recomp::Context& context, size_t cur_se
uint32_t section_vram_start = cur_section.ram_addr;
uint32_t section_vram_end = cur_section.ram_addr + cur_section.size;
bool in_current_section = target_func_vram >= section_vram_start && target_func_vram < section_vram_end;
bool needs_static = false;
bool exact_match_found = false;
// Use a thread local to prevent reallocation across runs and to allow multi-threading in the future.
@ -109,8 +108,8 @@ std::string_view ctx_gpr_prefix(int reg) {
return "";
}
// Major TODO, this function grew very organically and needs to be cleaned up. Ideally, it'll get split up into some sort of lookup table grouped by similar instruction types.
bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set<uint32_t>& skipped_insns, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span<std::vector<uint32_t>> static_funcs_out) {
template <typename GeneratorType>
bool process_instruction(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set<uint32_t>& jtbl_lw_instructions, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ostream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span<std::vector<uint32_t>> static_funcs_out) {
using namespace N64Recomp;
const auto& section = context.sections[func.section_index];
@ -118,6 +117,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
needs_link_branch = false;
is_branch_likely = false;
uint32_t instr_vram = instr.getVram();
InstrId instr_id = instr.getUniqueId();
auto print_indent = [&]() {
fmt::print(output_file, " ");
@ -132,16 +132,20 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
// Output a comment with the original instruction
if (instr.isBranch() || instr.getUniqueId() == InstrId::cpu_j) {
fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())));
} else if (instr.getUniqueId() == InstrId::cpu_jal) {
fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())));
print_indent();
if (instr.isBranch() || instr_id == InstrId::cpu_j) {
generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric()))));
} else if (instr_id == InstrId::cpu_jal) {
generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric()))));
} else {
fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0));
generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0)));
}
if (skipped_insns.contains(instr_vram)) {
return true;
// Replace loads for jump table entries into addiu. This leaves the jump table entry's address in the output register
// instead of the entry's value, which can then be used to determine the offset from the start of the jump table.
if (jtbl_lw_instructions.contains(instr_vram)) {
assert(instr_id == InstrId::cpu_lw);
instr_id = InstrId::cpu_addiu;
}
N64Recomp::RelocType reloc_type = N64Recomp::RelocType::R_MIPS_NONE;
@ -178,9 +182,9 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// Don't try to relocate special section symbols.
if (context.is_regular_reference_section(reloc.target_section) || reloc_section == N64Recomp::SectionAbsolute) {
bool ref_section_relocatable = context.is_reference_section_relocatable(reloc.target_section);
uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section);
// Resolve HI16 and LO16 reference symbol relocs to non-relocatable sections by patching the instruction immediate.
if (!ref_section_relocatable && (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16)) {
uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section);
uint32_t full_immediate = reloc.target_section_offset + ref_section_vram;
if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16) {
@ -206,13 +210,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
}
auto print_line = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
print_indent();
fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...));
fmt::print(output_file, ";\n");
};
auto print_unconditional_branch = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
auto process_delay_slot = [&](bool use_indent) {
if (instr_index < instructions.size() - 1) {
bool dummy_needs_link_branch;
bool dummy_is_branch_likely;
@ -221,56 +219,87 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
next_reloc_index++;
}
if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
if (!process_instruction(generator, context, func, stats, jtbl_lw_instructions, instr_index + 1, instructions, output_file, use_indent, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
return false;
}
}
print_indent();
fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...));
if (needs_link_branch) {
fmt::print(output_file, ";\n goto after_{};\n", link_branch_index);
} else {
fmt::print(output_file, ";\n");
}
return true;
};
auto print_func_call = [reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &section, &func, &static_funcs_out, &needs_link_branch, &print_unconditional_branch]
(uint32_t target_func_vram, bool link_branch = true, bool indent = false)
auto print_link_branch = [&]() {
if (needs_link_branch) {
print_indent();
generator.emit_goto(fmt::format("after_{}", link_branch_index));
}
};
auto print_return_with_delay_slot = [&]() {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_return();
print_link_branch();
return true;
};
auto print_goto_with_delay_slot = [&](const std::string& target) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_goto(target);
print_link_branch();
return true;
};
auto print_func_call_by_register = [&](int reg) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_function_call_by_register(reg);
print_link_branch();
return true;
};
auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &print_link_branch]
(uint32_t target_func_vram, bool tail_call = false, bool indent = false)
{
bool call_by_lookup = false;
bool call_by_name = false;
// Event symbol, emit a call to the runtime to trigger this event.
if (reloc_section == N64Recomp::SectionEvent) {
needs_link_branch = link_branch;
needs_link_branch = !tail_call;
if (indent) {
if (!print_unconditional_branch(" recomp_trigger_event(rdram, ctx, base_event_index + {})", reloc_reference_symbol)) {
return false;
}
} else {
if (!print_unconditional_branch("recomp_trigger_event(rdram, ctx, base_event_index + {})", reloc_reference_symbol)) {
return false;
}
print_indent();
}
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_trigger_event((uint32_t)reloc_reference_symbol);
print_link_branch();
}
// Normal symbol or reference symbol,
else {
std::string jal_target_name{};
size_t matched_func_index = (size_t)-1;
if (reloc_reference_symbol != (size_t)-1) {
const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol);
if (reloc_type != N64Recomp::RelocType::R_MIPS_26) {
fmt::print(stderr, "Unsupported reloc type {} on jal instruction in {}\n", (int)reloc_type, func.name);
return false;
}
if (ref_symbol.section_offset != reloc_target_section_offset) {
fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name);
return false;
if (!context.skip_validating_reference_symbols) {
const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol);
if (ref_symbol.section_offset != reloc_target_section_offset) {
fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name);
return false;
}
}
jal_target_name = ref_symbol.name;
}
else {
size_t matched_func_index = 0;
JalResolutionResult jal_result = resolve_jal(context, func.section_index, target_func_vram, matched_func_index);
switch (jal_result) {
@ -284,65 +313,78 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// Create a static function add it to the static function list for this section.
jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram);
static_funcs_out[func.section_index].push_back(target_func_vram);
call_by_name = true;
break;
case JalResolutionResult::Ambiguous:
fmt::print(stderr, "[Info] Ambiguous jal target 0x{:08X} in function {}, falling back to function lookup\n", target_func_vram, func.name);
// Relocation isn't necessary for jumps inside a relocatable section, as this code path will never run if the target vram
// is in the current function's section (see the branch for `in_current_section` above).
// If a game ever needs to jump between multiple relocatable sections, relocation will be necessary here.
jal_target_name = fmt::format("LOOKUP_FUNC(0x{:08X})", target_func_vram);
call_by_lookup = true;
break;
case JalResolutionResult::Error:
fmt::print(stderr, "Internal error when resolving jal to address 0x{:08X} in function {}. Please report this issue.\n", target_func_vram, func.name);
return false;
}
}
needs_link_branch = link_branch;
needs_link_branch = !tail_call;
if (indent) {
if (!print_unconditional_branch(" {}(rdram, ctx)", jal_target_name)) {
return false;
}
} else {
if (!print_unconditional_branch("{}(rdram, ctx)", jal_target_name)) {
return false;
}
print_indent();
}
if (!process_delay_slot(false)) {
return false;
}
print_indent();
if (reloc_reference_symbol != (size_t)-1) {
generator.emit_function_call_reference_symbol(context, reloc_section, reloc_reference_symbol, reloc_target_section_offset);
}
else if (call_by_lookup) {
generator.emit_function_call_lookup(target_func_vram);
}
else if (call_by_name) {
generator.emit_named_function_call(jal_target_name);
}
else {
generator.emit_function_call(context, matched_func_index);
}
print_link_branch();
}
return true;
};
auto print_branch = [&](uint32_t branch_target) {
// If the branch target is outside the current function, check if it can be treated as a tail call.
if (branch_target < func.vram || branch_target >= func_vram_end) {
// If the branch target is the start of some known function, this can be handled as a tail call.
// FIXME: how to deal with static functions?
if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target);
if (!print_func_call(branch_target, false, true)) {
if (!print_func_call_by_address(branch_target, true, true)) {
return false;
}
print_line(" return");
fmt::print(output_file, " }}\n");
print_indent();
generator.emit_return();
// TODO check if this branch close should exist.
// print_indent();
// generator.emit_branch_close();
return true;
}
fmt::print(stderr, "[Warn] Function {} is branching outside of the function (to 0x{:08X})\n", func.name, branch_target);
}
if (instr_index < instructions.size() - 1) {
bool dummy_needs_link_branch;
bool dummy_is_branch_likely;
size_t next_reloc_index = reloc_index;
uint32_t next_vram = instr_vram + 4;
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
next_reloc_index++;
}
if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
return false;
}
if (!process_delay_slot(true)) {
return false;
}
fmt::print(output_file, " goto L_{:08X};\n", branch_target);
print_indent();
print_indent();
generator.emit_goto(fmt::format("L_{:08X}", branch_target));
// TODO check if this link branch ever exists.
if (needs_link_branch) {
fmt::print(output_file, " goto after_{};\n", link_branch_index);
print_indent();
print_indent();
generator.emit_goto(fmt::format("after_{}", link_branch_index));
}
return true;
};
@ -353,7 +395,6 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
int rd = (int)instr.GetO32_rd();
int rs = (int)instr.GetO32_rs();
int base = rs;
int rt = (int)instr.GetO32_rt();
int sa = (int)instr.Get_sa();
@ -365,7 +406,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
bool handled = true;
switch (instr.getUniqueId()) {
switch (instr_id) {
case InstrId::cpu_nop:
fmt::print(output_file, "\n");
break;
@ -375,7 +416,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
Cop0Reg reg = instr.Get_cop0d();
switch (reg) {
case Cop0Reg::COP0_Status:
print_line("{}{} = cop0_status_read(ctx)", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop0_status_read(rt);
break;
default:
fmt::print(stderr, "Unhandled cop0 register in mfc0: {}\n", (int)reg);
@ -388,7 +430,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
Cop0Reg reg = instr.Get_cop0d();
switch (reg) {
case Cop0Reg::COP0_Status:
print_line("cop0_status_write(ctx, {}{})", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop0_status_write(rt);
break;
default:
fmt::print(stderr, "Unhandled cop0 register in mtc0: {}\n", (int)reg);
@ -408,38 +451,25 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// If so, create a temp to preserve the addend register's value
if (find_result != stats.jump_tables.end()) {
const N64Recomp::JumpTable& cur_jtbl = *find_result;
print_line("gpr jr_addend_{:08X} = {}{}", cur_jtbl.jr_vram, ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg);
print_indent();
generator.emit_jtbl_addend_declaration(cur_jtbl, cur_jtbl.addend_reg);
}
}
break;
case InstrId::cpu_mult:
print_line("result = S64(S32({}{})) * S64(S32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_dmult:
print_line("DMULT(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_multu:
print_line("result = U64(U32({}{})) * U64(U32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_dmultu:
print_line("DMULTU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_div:
// Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1
print_line("lo = S32(S64(S32({}{})) / S64(S32({}{}))); hi = S32(S64(S32({}{})) % S64(S32({}{})))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_ddiv:
print_line("DDIV(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_divu:
print_line("lo = S32(U32({}{}) / U32({}{})); hi = S32(U32({}{}) % U32({}{}))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_ddivu:
print_line("DDIVU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_muldiv(instr_id, rs, rt);
break;
// Branches
case InstrId::cpu_jal:
if (!print_func_call(instr.getBranchVramGeneric())) {
if (!print_func_call_by_address(instr.getBranchVramGeneric())) {
return false;
}
break;
@ -450,18 +480,19 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
return false;
}
needs_link_branch = true;
print_unconditional_branch("LOOKUP_FUNC({}{})(rdram, ctx)", ctx_gpr_prefix(rs), rs);
print_func_call_by_register(rs);
break;
case InstrId::cpu_j:
case InstrId::cpu_b:
{
uint32_t branch_target = instr.getBranchVramGeneric();
if (branch_target == instr_vram) {
print_line("pause_self(rdram)");
print_indent();
generator.emit_pause_self();
}
// Check if the branch is within this function
else if (branch_target >= func.vram && branch_target < func_vram_end) {
print_unconditional_branch("goto L_{:08X}", branch_target);
print_goto_with_delay_slot(fmt::format("L_{:08X}", branch_target));
}
// This may be a tail call in the middle of the control flow due to a previous check
// For example:
@ -476,11 +507,12 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// ```
// FIXME: how to deal with static functions?
else if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target);
if (!print_func_call(branch_target, false)) {
fmt::print("[Info] Tail call in {} to 0x{:08X}\n", func.name, branch_target);
if (!print_func_call_by_address(branch_target, true)) {
return false;
}
print_line("return");
print_indent();
generator.emit_return();
}
else {
fmt::print(stderr, "Unhandled branch in {} at 0x{:08X} to 0x{:08X}\n", func.name, instr_vram, branch_target);
@ -490,7 +522,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
break;
case InstrId::cpu_jr:
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
print_unconditional_branch("return");
print_return_with_delay_slot();
} else {
auto jtbl_find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const N64Recomp::JumpTable& jtbl) {
@ -499,58 +531,41 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (jtbl_find_result != stats.jump_tables.end()) {
const N64Recomp::JumpTable& cur_jtbl = *jtbl_find_result;
bool dummy_needs_link_branch, dummy_is_branch_likely;
size_t next_reloc_index = reloc_index;
uint32_t next_vram = instr_vram + 4;
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
next_reloc_index++;
}
if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram);
generator.emit_switch(context, cur_jtbl, rs);
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
print_indent();
print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]);
print_indent();
generator.emit_case(entry_index, fmt::format("L_{:08X}", cur_jtbl.entries[entry_index]));
}
print_indent();
print_line("default: switch_error(__func__, 0x{:08X}, 0x{:08X})", instr_vram, cur_jtbl.vram);
print_indent();
fmt::print(output_file, "}}\n");
generator.emit_switch_error(instr_vram, cur_jtbl.vram);
print_indent();
generator.emit_switch_close();
break;
}
auto jump_find_result = std::find_if(stats.absolute_jumps.begin(), stats.absolute_jumps.end(),
[instr_vram](const N64Recomp::AbsoluteJump& jump) {
return jump.instruction_vram == instr_vram;
});
if (jump_find_result != stats.absolute_jumps.end()) {
print_unconditional_branch("LOOKUP_FUNC({})(rdram, ctx)", (uint64_t)(int32_t)jump_find_result->jump_target);
// jr doesn't link so it acts like a tail call, meaning we should return directly after the jump returns
print_line("return");
break;
}
bool is_tail_call = instr_vram == func_vram_end - 2 * sizeof(func.words[0]);
if (is_tail_call) {
fmt::print("Indirect tail call in {}\n", func.name);
print_unconditional_branch("LOOKUP_FUNC({}{})(rdram, ctx)", ctx_gpr_prefix(rs), rs);
print_line("return");
break;
}
fmt::print(stderr, "No jump table found for jr at 0x{:08X} and not tail call\n", instr_vram);
fmt::print("[Info] Indirect tail call in {}\n", func.name);
print_func_call_by_register(rs);
print_indent();
generator.emit_return();
break;
}
break;
case InstrId::cpu_syscall:
print_line("recomp_syscall_handler(rdram, ctx, 0x{:08X})", instr_vram);
print_indent();
generator.emit_syscall(instr_vram);
// syscalls don't link, so treat it like a tail call
print_line("return");
print_indent();
generator.emit_return();
break;
case InstrId::cpu_break:
print_line("do_break({})", instr_vram);
print_indent();
generator.emit_do_break(instr_vram);
break;
// Cop1 rounding mode
@ -559,21 +574,22 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
fmt::print(stderr, "Invalid FP control register for ctc1: {}\n", cop1_cs);
return false;
}
print_line("rounding_mode = ({}{}) & 0x3", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop1_cs_write(rt);
break;
case InstrId::cpu_cfc1:
if (cop1_cs != 31) {
fmt::print(stderr, "Invalid FP control register for cfc1: {}\n", cop1_cs);
return false;
}
print_line("{}{} = rounding_mode", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop1_cs_read(rt);
break;
default:
handled = false;
break;
}
CGenerator generator{};
InstructionContext instruction_context{};
instruction_context.rd = rd;
instruction_context.rs = rs;
@ -589,28 +605,28 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
instruction_context.reloc_section_index = reloc_section;
instruction_context.reloc_target_section_offset = reloc_target_section_offset;
auto do_check_fr = [](std::ostream& output_file, const CGenerator& generator, const InstructionContext& ctx, Operand operand) {
auto do_check_fr = [](const GeneratorType& generator, const InstructionContext& ctx, Operand operand) {
switch (operand) {
case Operand::Fd:
case Operand::FdDouble:
case Operand::FdU32L:
case Operand::FdU32H:
case Operand::FdU64:
generator.emit_check_fr(output_file, ctx.fd);
generator.emit_check_fr(ctx.fd);
break;
case Operand::Fs:
case Operand::FsDouble:
case Operand::FsU32L:
case Operand::FsU32H:
case Operand::FsU64:
generator.emit_check_fr(output_file, ctx.fs);
generator.emit_check_fr(ctx.fs);
break;
case Operand::Ft:
case Operand::FtDouble:
case Operand::FtU32L:
case Operand::FtU32H:
case Operand::FtU64:
generator.emit_check_fr(output_file, ctx.ft);
generator.emit_check_fr(ctx.ft);
break;
default:
// No MIPS3 float check needed for non-float operands.
@ -618,25 +634,25 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
};
auto do_check_nan = [](std::ostream& output_file, const CGenerator& generator, const InstructionContext& ctx, Operand operand) {
auto do_check_nan = [](const GeneratorType& generator, const InstructionContext& ctx, Operand operand) {
switch (operand) {
case Operand::Fd:
generator.emit_check_nan(output_file, ctx.fd, false);
generator.emit_check_nan(ctx.fd, false);
break;
case Operand::Fs:
generator.emit_check_nan(output_file, ctx.fs, false);
generator.emit_check_nan(ctx.fs, false);
break;
case Operand::Ft:
generator.emit_check_nan(output_file, ctx.ft, false);
generator.emit_check_nan(ctx.ft, false);
break;
case Operand::FdDouble:
generator.emit_check_nan(output_file, ctx.fd, true);
generator.emit_check_nan(ctx.fd, true);
break;
case Operand::FsDouble:
generator.emit_check_nan(output_file, ctx.fs, true);
generator.emit_check_nan(ctx.fs, true);
break;
case Operand::FtDouble:
generator.emit_check_nan(output_file, ctx.ft, true);
generator.emit_check_nan(ctx.ft, true);
break;
default:
// No NaN checks needed for non-float operands.
@ -644,54 +660,58 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
};
auto find_binary_it = binary_ops.find(instr.getUniqueId());
auto find_binary_it = binary_ops.find(instr_id);
if (find_binary_it != binary_ops.end()) {
print_indent();
const BinaryOp& op = find_binary_it->second;
if (op.check_fr) {
do_check_fr(output_file, generator, instruction_context, op.output);
do_check_fr(output_file, generator, instruction_context, op.operands.operands[0]);
do_check_fr(output_file, generator, instruction_context, op.operands.operands[1]);
do_check_fr(generator, instruction_context, op.output);
do_check_fr(generator, instruction_context, op.operands.operands[0]);
do_check_fr(generator, instruction_context, op.operands.operands[1]);
}
if (op.check_nan) {
do_check_nan(output_file, generator, instruction_context, op.operands.operands[0]);
do_check_nan(output_file, generator, instruction_context, op.operands.operands[1]);
fmt::print(output_file, "\n ");
do_check_nan(generator, instruction_context, op.operands.operands[0]);
do_check_nan(generator, instruction_context, op.operands.operands[1]);
fmt::print(output_file, "\n");
print_indent();
}
generator.process_binary_op(output_file, op, instruction_context);
generator.process_binary_op(op, instruction_context);
handled = true;
}
auto find_unary_it = unary_ops.find(instr.getUniqueId());
auto find_unary_it = unary_ops.find(instr_id);
if (find_unary_it != unary_ops.end()) {
print_indent();
const UnaryOp& op = find_unary_it->second;
if (op.check_fr) {
do_check_fr(output_file, generator, instruction_context, op.output);
do_check_fr(output_file, generator, instruction_context, op.input);
do_check_fr(generator, instruction_context, op.output);
do_check_fr(generator, instruction_context, op.input);
}
if (op.check_nan) {
do_check_nan(output_file, generator, instruction_context, op.input);
fmt::print(output_file, "\n ");
do_check_nan(generator, instruction_context, op.input);
fmt::print(output_file, "\n");
print_indent();
}
generator.process_unary_op(output_file, op, instruction_context);
generator.process_unary_op(op, instruction_context);
handled = true;
}
auto find_conditional_branch_it = conditional_branch_ops.find(instr.getUniqueId());
auto find_conditional_branch_it = conditional_branch_ops.find(instr_id);
if (find_conditional_branch_it != conditional_branch_ops.end()) {
print_indent();
generator.emit_branch_condition(output_file, find_conditional_branch_it->second, instruction_context);
// TODO combining the branch condition and branch target into one generator call would allow better optimization in the runtime's JIT generator.
// This would require splitting into a conditional jump method and conditional function call method.
generator.emit_branch_condition(find_conditional_branch_it->second, instruction_context);
print_indent();
if (find_conditional_branch_it->second.link) {
if (!print_func_call(instr.getBranchVramGeneric())) {
if (!print_func_call_by_address(instr.getBranchVramGeneric())) {
return false;
}
}
@ -701,22 +721,23 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
}
generator.emit_branch_close(output_file);
print_indent();
generator.emit_branch_close();
is_branch_likely = find_conditional_branch_it->second.likely;
handled = true;
}
auto find_store_it = store_ops.find(instr.getUniqueId());
auto find_store_it = store_ops.find(instr_id);
if (find_store_it != store_ops.end()) {
print_indent();
const StoreOp& op = find_store_it->second;
if (op.type == StoreOpType::SDC1) {
do_check_fr(output_file, generator, instruction_context, op.value_input);
do_check_fr(generator, instruction_context, op.value_input);
}
generator.process_store_op(output_file, op, instruction_context);
generator.process_store_op(op, instruction_context);
handled = true;
}
@ -727,23 +748,20 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// TODO is this used?
if (emit_link_branch) {
fmt::print(output_file, " after_{}:\n", link_branch_index);
print_indent();
generator.emit_label(fmt::format("after_{}", link_branch_index));
}
return true;
}
bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
template <typename GeneratorType>
bool recompile_function_impl(GeneratorType& generator, const N64Recomp::Context& context, size_t func_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
const N64Recomp::Function& func = context.functions[func_index];
//fmt::print("Recompiling {}\n", func.name);
std::vector<rabbitizer::InstructionCpu> instructions;
fmt::print(output_file,
"RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n"
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
" uint64_t hi = 0, lo = 0, result = 0;\n"
" unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n"
" int c1cs = 0;\n", // cop1 conditional signal
func.name);
generator.emit_function_start(func.name, func_index);
if (context.trace_mode) {
fmt::print(output_file,
@ -784,11 +802,11 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
return false;
}
std::unordered_set<uint32_t> skipped_insns{};
std::unordered_set<uint32_t> jtbl_lw_instructions{};
// Add jump table labels into function
for (const auto& jtbl : stats.jump_tables) {
skipped_insns.insert(jtbl.lw_vram);
jtbl_lw_instructions.insert(jtbl.lw_vram);
for (uint32_t jtbl_entry : jtbl.entries) {
branch_labels.insert(jtbl_entry);
}
@ -808,11 +826,11 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
bool is_branch_likely = false;
// If we're in the delay slot of a likely instruction, emit a goto to skip the instruction before any labels
if (in_likely_delay_slot) {
fmt::print(output_file, " goto skip_{};\n", num_likely_branches);
generator.emit_goto(fmt::format("skip_{}", num_likely_branches));
}
// If there are any other branch labels to insert and we're at the next one, insert it
if (cur_label != branch_labels.end() && vram >= *cur_label) {
fmt::print(output_file, "L_{:08X}:\n", *cur_label);
generator.emit_label(fmt::format("L_{:08X}", *cur_label));
++cur_label;
}
@ -822,7 +840,7 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
}
// Process the current instruction and check for errors
if (process_instruction(context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) {
if (process_instruction(generator, context, func, stats, jtbl_lw_instructions, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) {
fmt::print(stderr, "Error in recompiling {}, clearing output file\n", func.name);
output_file.clear();
return false;
@ -833,7 +851,8 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
}
// Now that the instruction has been processed, emit a skip label for the likely branch if needed
if (in_likely_delay_slot) {
fmt::print(output_file, " skip_{}:\n", num_likely_branches);
fmt::print(output_file, " ");
generator.emit_label(fmt::format("skip_{}", num_likely_branches));
num_likely_branches++;
}
// Mark the next instruction as being in a likely delay slot if the
@ -844,7 +863,17 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
}
// Terminate the function
fmt::print(output_file, ";}}\n");
generator.emit_function_end();
return true;
}
// Wrap the templated function with CGenerator as the template parameter.
bool N64Recomp::recompile_function(const N64Recomp::Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
CGenerator generator{output_file};
return recompile_function_impl(generator, context, function_index, output_file, static_funcs_out, tag_reference_relocs);
}
bool N64Recomp::recompile_function_custom(Generator& generator, const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
return recompile_function_impl(generator, context, function_index, output_file, static_funcs_out, tag_reference_relocs);
}

View file

@ -1,4 +1,4 @@
#include "n64recomp.h"
#include "recompiler/context.h"
const std::unordered_set<std::string> N64Recomp::reimplemented_funcs {
// OS initialize functions