Moved remaining instruction processing text generation into Generator class

This commit is contained in:
Mr-Wiseguy 2024-09-28 23:58:25 -04:00
parent 0d0e93e979
commit 46ab6f758d
5 changed files with 332 additions and 151 deletions

View file

@ -30,10 +30,33 @@ namespace N64Recomp {
virtual void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const = 0;
virtual void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const = 0;
virtual void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const = 0;
virtual void emit_function_start(std::ostream& output_file, const std::string& function_name) const = 0;
virtual void emit_function_end(std::ostream& output_file) const = 0;
virtual void emit_function_call_lookup(std::ostream& output_file, uint32_t addr) const = 0;
virtual void emit_function_call_by_register(std::ostream& output_file, int reg) const = 0;
virtual void emit_function_call_by_name(std::ostream& output_file, const std::string& func_name) const = 0;
virtual void emit_goto(std::ostream& output_file, const std::string& target) const = 0;
virtual void emit_label(std::ostream& output_file, const std::string& label_name) const = 0;
virtual void emit_variable_declaration(std::ostream& output_file, const std::string& var_name, int reg) const = 0;
virtual void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0;
virtual void emit_branch_close(std::ostream& output_file) const = 0;
virtual void emit_switch(std::ostream& output_file, const std::string& jump_variable, int shift_amount) const = 0;
virtual void emit_case(std::ostream& output_file, int case_index, const std::string& target_label) const = 0;
virtual void emit_switch_error(std::ostream& output_file, uint32_t instr_vram, uint32_t jtbl_vram) const = 0;
virtual void emit_switch_close(std::ostream& output_file) const = 0;
virtual void emit_return(std::ostream& output_file) const = 0;
virtual void emit_check_fr(std::ostream& output_file, int fpr) const = 0;
virtual void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const = 0;
virtual void emit_cop0_status_read(std::ostream& output_file, int reg) const = 0;
virtual void emit_cop0_status_write(std::ostream& output_file, int reg) const = 0;
virtual void emit_cop1_cs_read(std::ostream& output_file, int reg) const = 0;
virtual void emit_cop1_cs_write(std::ostream& output_file, int reg) const = 0;
virtual void emit_muldiv(std::ostream& output_file, InstrId instr_id, int reg1, int reg2) const = 0;
virtual void emit_syscall(std::ostream& output_file, uint32_t instr_vram) const = 0;
virtual void emit_do_break(std::ostream& output_file, uint32_t instr_vram) const = 0;
virtual void emit_pause_self(std::ostream& output_file) const = 0;
virtual void emit_trigger_event(std::ostream& output_file, size_t event_index) const = 0;
virtual void emit_comment(std::ostream& output_file, const std::string& comment) const = 0;
};
class CGenerator final : Generator {
@ -42,10 +65,33 @@ namespace N64Recomp {
void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const final;
void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const final;
void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const final;
void emit_function_start(std::ostream& output_file, const std::string& function_name) const final;
void emit_function_end(std::ostream& output_file) const final;
void emit_function_call_lookup(std::ostream& output_file, uint32_t addr) const final;
void emit_function_call_by_register(std::ostream& output_file, int reg) const final;
void emit_function_call_by_name(std::ostream& output_file, const std::string& func_name) const final;
void emit_goto(std::ostream& output_file, const std::string& target) const final;
void emit_label(std::ostream& output_file, const std::string& label_name) const final;
void emit_variable_declaration(std::ostream& output_file, const std::string& var_name, int reg) const final;
void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
void emit_branch_close(std::ostream& output_file) const final;
void emit_switch(std::ostream& output_file, const std::string& jump_variable, int shift_amount) const final;
void emit_case(std::ostream& output_file, int case_index, const std::string& target_label) const final;
void emit_switch_error(std::ostream& output_file, uint32_t instr_vram, uint32_t jtbl_vram) const final;
void emit_switch_close(std::ostream& output_file) const final;
void emit_return(std::ostream& output_file) const final;
void emit_check_fr(std::ostream& output_file, int fpr) const final;
void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const final;
void emit_cop0_status_read(std::ostream& output_file, int reg) const final;
void emit_cop0_status_write(std::ostream& output_file, int reg) const final;
void emit_cop1_cs_read(std::ostream& output_file, int reg) const final;
void emit_cop1_cs_write(std::ostream& output_file, int reg) const final;
void emit_muldiv(std::ostream& output_file, InstrId instr_id, int reg1, int reg2) const final;
void emit_syscall(std::ostream& output_file, uint32_t instr_vram) const final;
void emit_do_break(std::ostream& output_file, uint32_t instr_vram) const final;
void emit_pause_self(std::ostream& output_file) const final;
void emit_trigger_event(std::ostream& output_file, size_t event_index) const final;
void emit_comment(std::ostream& output_file, const std::string& comment) const final;
private:
void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;

View file

@ -196,19 +196,8 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const N64Recom
instr.getVram(),
std::vector<uint32_t>{}
);
} else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) {
uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui;
stats.absolute_jumps.emplace_back(
address,
instr.getVram()
);
}
// Allow tail calls (TODO account for trailing nops due to bad function splits)
else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) {
// Inconclusive analysis
fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name);
return false;
}
// TODO stricter validation on tail calls, since not all indirect jumps can be treated as one.
break;
default:
if (instr.modifiesRd()) {

View file

@ -29,7 +29,6 @@ namespace N64Recomp {
struct FunctionStats {
std::vector<JumpTable> jump_tables;
std::vector<AbsoluteJump> absolute_jumps;
};
bool analyze_function(const Context& context, const Function& function, const std::vector<rabbitizer::InstructionCpu>& instructions, FunctionStats& stats);

View file

@ -365,6 +365,46 @@ void N64Recomp::CGenerator::get_binary_expr_string(BinaryOpType type, const Bina
}
}
void N64Recomp::CGenerator::emit_function_start(std::ostream& output_file, const std::string& function_name) const {
fmt::print(output_file,
"RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n"
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
" uint64_t hi = 0, lo = 0, result = 0;\n"
" unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n"
" int c1cs = 0;\n", // cop1 conditional signal
function_name);
}
void N64Recomp::CGenerator::emit_function_end(std::ostream& output_file) const {
fmt::print(output_file, ";}}\n");
}
void N64Recomp::CGenerator::emit_function_call_lookup(std::ostream& output_file, uint32_t addr) const {
fmt::print(output_file, "LOOKUP_FUNC(0x{:08X})(rdram, ctx);\n", addr);
}
void N64Recomp::CGenerator::emit_function_call_by_register(std::ostream& output_file, int reg) const {
fmt::print(output_file, "LOOKUP_FUNC({})(rdram, ctx);\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_function_call_by_name(std::ostream& output_file, const std::string& func_name) const {
fmt::print(output_file, "{}(rdram, ctx);\n", func_name);
}
void N64Recomp::CGenerator::emit_goto(std::ostream& output_file, const std::string& target) const {
fmt::print(output_file,
" goto {};\n", target);
}
void N64Recomp::CGenerator::emit_label(std::ostream& output_file, const std::string& label_name) const {
fmt::print(output_file,
"{}:\n", label_name);
}
void N64Recomp::CGenerator::emit_variable_declaration(std::ostream& output_file, const std::string& var_name, int reg) const {
fmt::print(output_file, "gpr {} = {};\n", var_name, gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const {
// Thread local variables to prevent allocations when possible.
// TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations.
@ -374,7 +414,27 @@ void N64Recomp::CGenerator::emit_branch_condition(std::ostream& output_file, con
}
void N64Recomp::CGenerator::emit_branch_close(std::ostream& output_file) const {
fmt::print(output_file, " }}\n");
fmt::print(output_file, "}}\n");
}
void N64Recomp::CGenerator::emit_switch_close(std::ostream& output_file) const {
fmt::print(output_file, "}}\n");
}
void N64Recomp::CGenerator::emit_switch(std::ostream& output_file, const std::string& jump_variable, int shift_amount) const {
fmt::print(output_file, "switch ({} >> {}) {{\n", jump_variable, shift_amount);
}
void N64Recomp::CGenerator::emit_case(std::ostream& output_file, int case_index, const std::string& target_label) const {
fmt::print(output_file, "case {}: goto {}; break;\n", case_index, target_label);
}
void N64Recomp::CGenerator::emit_switch_error(std::ostream& output_file, uint32_t instr_vram, uint32_t jtbl_vram) const {
fmt::print(output_file, "default: switch_error(__func__, 0x{:08X}, 0x{:08X});\n", instr_vram, jtbl_vram);
}
void N64Recomp::CGenerator::emit_return(std::ostream& output_file) const {
fmt::print(output_file, "return;\n");
}
void N64Recomp::CGenerator::emit_check_fr(std::ostream& output_file, int fpr) const {
@ -385,6 +445,72 @@ void N64Recomp::CGenerator::emit_check_nan(std::ostream& output_file, int fpr, b
fmt::print(output_file, "NAN_CHECK(ctx->f{}.{}); ", fpr, is_double ? "d" : "fl");
}
void N64Recomp::CGenerator::emit_cop0_status_read(std::ostream& output_file, int reg) const {
fmt::print(output_file, "{} = cop0_status_read(ctx);\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_cop0_status_write(std::ostream& output_file, int reg) const {
fmt::print(output_file, "cop0_status_write(ctx, {})", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_cop1_cs_read(std::ostream& output_file, int reg) const {
fmt::print(output_file, "{} = rounding_mode;\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_cop1_cs_write(std::ostream& output_file, int reg) const {
fmt::print(output_file, "rounding_mode = ({}) & 0x3;\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_muldiv(std::ostream& output_file, InstrId instr_id, int reg1, int reg2) const {
switch (instr_id) {
case InstrId::cpu_mult:
fmt::print(output_file, "result = S64(S32({})) * S64(S32({})); lo = S32(result >> 0); hi = S32(result >> 32);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_dmult:
fmt::print(output_file, "DMULT(S64({}), S64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_multu:
fmt::print(output_file, "result = U64(U32({})) * U64(U32({})); lo = S32(result >> 0); hi = S32(result >> 32);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_dmultu:
fmt::print(output_file, "DMULTU(U64({}), U64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_div:
// Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1
fmt::print(output_file, "lo = S32(S64(S32({0})) / S64(S32({1}))); hi = S32(S64(S32({0})) % S64(S32({1})));\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_ddiv:
fmt::print(output_file, "DDIV(S64({}), S64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_divu:
fmt::print(output_file, "lo = S32(U32({0}) / U32({1})); hi = S32(U32({0}) % U32({1}));\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
case InstrId::cpu_ddivu:
fmt::print(output_file, "DDIVU(U64({}), U64({}), &lo, &hi);\n", gpr_to_string(reg1), gpr_to_string(reg2));
break;
}
}
void N64Recomp::CGenerator::emit_syscall(std::ostream& output_file, uint32_t instr_vram) const {
fmt::print(output_file, "recomp_syscall_handler(rdram, ctx, 0x{:08X});\n", instr_vram);
}
void N64Recomp::CGenerator::emit_do_break(std::ostream& output_file, uint32_t instr_vram) const {
fmt::print(output_file, "do_break({});\n", instr_vram);
}
void N64Recomp::CGenerator::emit_pause_self(std::ostream& output_file) const {
fmt::print(output_file, "pause_self(rdram);\n");
}
void N64Recomp::CGenerator::emit_trigger_event(std::ostream& output_file, size_t event_index) const {
fmt::print(output_file, "recomp_trigger_event(rdram, ctx, base_event_index + {});\n", event_index);
}
void N64Recomp::CGenerator::emit_comment(std::ostream& output_file, const std::string& comment) const {
fmt::print(output_file, "// {}\n", comment);
}
void N64Recomp::CGenerator::process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const {
// Thread local variables to prevent allocations when possible.
// TODO these thread locals probably don't actually help right now, so figure out a better way to prevent allocations.

View file

@ -109,8 +109,8 @@ std::string_view ctx_gpr_prefix(int reg) {
return "";
}
// Major TODO, this function grew very organically and needs to be cleaned up. Ideally, it'll get split up into some sort of lookup table grouped by similar instruction types.
bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set<uint32_t>& skipped_insns, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span<std::vector<uint32_t>> static_funcs_out) {
template <typename GeneratorType>
bool process_instruction(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set<uint32_t>& skipped_insns, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span<std::vector<uint32_t>> static_funcs_out) {
using namespace N64Recomp;
const auto& section = context.sections[func.section_index];
@ -132,12 +132,13 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
// Output a comment with the original instruction
print_indent();
if (instr.isBranch() || instr.getUniqueId() == InstrId::cpu_j) {
fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())));
generator.emit_comment(output_file, fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric()))));
} else if (instr.getUniqueId() == InstrId::cpu_jal) {
fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())));
generator.emit_comment(output_file, fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric()))));
} else {
fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0));
generator.emit_comment(output_file, fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0)));
}
if (skipped_insns.contains(instr_vram)) {
@ -206,13 +207,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
}
auto print_line = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
print_indent();
fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...));
fmt::print(output_file, ";\n");
};
auto print_unconditional_branch = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
auto process_delay_slot = [&](bool use_indent) {
if (instr_index < instructions.size() - 1) {
bool dummy_needs_link_branch;
bool dummy_is_branch_likely;
@ -221,35 +216,76 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
next_reloc_index++;
}
if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
if (!process_instruction(generator, context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, use_indent, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
return false;
}
}
print_indent();
fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...));
if (needs_link_branch) {
fmt::print(output_file, ";\n goto after_{};\n", link_branch_index);
} else {
fmt::print(output_file, ";\n");
}
return true;
};
auto print_func_call = [reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &section, &func, &static_funcs_out, &needs_link_branch, &print_unconditional_branch]
(uint32_t target_func_vram, bool link_branch = true, bool indent = false)
auto print_link_branch = [&]() {
if (needs_link_branch) {
print_indent();
generator.emit_goto(output_file, fmt::format("after_{}", link_branch_index));
}
};
auto print_return_with_delay_slot = [&]() {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_return(output_file);
print_link_branch();
return true;
};
auto print_goto_with_delay_slot = [&](const std::string& target) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_goto(output_file, target);
print_link_branch();
return true;
};
auto print_func_call_by_register = [&](int reg) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_function_call_by_register(output_file, reg);
print_link_branch();
return true;
};
auto print_func_call_lookup = [&](uint32_t target_vram) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_function_call_lookup(output_file, target_vram);
print_link_branch();
return true;
};
auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &section, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &output_file, &print_link_branch]
(uint32_t target_func_vram, bool tail_call = false, bool indent = false)
{
bool call_by_lookup = false;
// Event symbol, emit a call to the runtime to trigger this event.
if (reloc_section == N64Recomp::SectionEvent) {
needs_link_branch = link_branch;
needs_link_branch = !tail_call;
if (indent) {
if (!print_unconditional_branch(" recomp_trigger_event(rdram, ctx, base_event_index + {})", reloc_reference_symbol)) {
return false;
}
} else {
if (!print_unconditional_branch("recomp_trigger_event(rdram, ctx, base_event_index + {})", reloc_reference_symbol)) {
return false;
}
print_indent();
}
if (!process_delay_slot(false)) {
return false;
}
print_indent();
generator.emit_trigger_event(output_file, reloc_reference_symbol);
print_link_branch();
}
// Normal symbol or reference symbol,
else {
@ -290,59 +326,63 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// Relocation isn't necessary for jumps inside a relocatable section, as this code path will never run if the target vram
// is in the current function's section (see the branch for `in_current_section` above).
// If a game ever needs to jump between multiple relocatable sections, relocation will be necessary here.
jal_target_name = fmt::format("LOOKUP_FUNC(0x{:08X})", target_func_vram);
call_by_lookup = true;
break;
case JalResolutionResult::Error:
fmt::print(stderr, "Internal error when resolving jal to address 0x{:08X} in function {}. Please report this issue.\n", target_func_vram, func.name);
return false;
}
}
needs_link_branch = link_branch;
needs_link_branch = !tail_call;
if (indent) {
if (!print_unconditional_branch(" {}(rdram, ctx)", jal_target_name)) {
return false;
}
} else {
if (!print_unconditional_branch("{}(rdram, ctx)", jal_target_name)) {
return false;
}
print_indent();
}
if (!process_delay_slot(false)) {
return false;
}
print_indent();
if (call_by_lookup) {
generator.emit_function_call_lookup(output_file, target_func_vram);
}
else {
generator.emit_function_call_by_name(output_file, jal_target_name);
}
print_link_branch();
}
return true;
};
auto print_branch = [&](uint32_t branch_target) {
// If the branch target is outside the current function, check if it can be treated as a tail call.
if (branch_target < func.vram || branch_target >= func_vram_end) {
// If the branch target is the start of some known function, this can be handled as a tail call.
// FIXME: how to deal with static functions?
if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target);
if (!print_func_call(branch_target, false, true)) {
if (!print_func_call_by_address(branch_target, true, true)) {
return false;
}
print_line(" return");
fmt::print(output_file, " }}\n");
print_indent();
generator.emit_return(output_file);
print_indent();
generator.emit_branch_close(output_file);
return true;
}
fmt::print(stderr, "[Warn] Function {} is branching outside of the function (to 0x{:08X})\n", func.name, branch_target);
}
if (instr_index < instructions.size() - 1) {
bool dummy_needs_link_branch;
bool dummy_is_branch_likely;
size_t next_reloc_index = reloc_index;
uint32_t next_vram = instr_vram + 4;
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
next_reloc_index++;
}
if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
return false;
}
if (!process_delay_slot(true)) {
return false;
}
fmt::print(output_file, " goto L_{:08X};\n", branch_target);
print_indent();
print_indent();
generator.emit_goto(output_file, fmt::format("L_{:08X}", branch_target));
if (needs_link_branch) {
fmt::print(output_file, " goto after_{};\n", link_branch_index);
print_indent();
print_indent();
generator.emit_goto(output_file, fmt::format("after_{}", link_branch_index));
}
return true;
};
@ -375,7 +415,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
Cop0Reg reg = instr.Get_cop0d();
switch (reg) {
case Cop0Reg::COP0_Status:
print_line("{}{} = cop0_status_read(ctx)", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop0_status_read(output_file, rt);
break;
default:
fmt::print(stderr, "Unhandled cop0 register in mfc0: {}\n", (int)reg);
@ -388,7 +429,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
Cop0Reg reg = instr.Get_cop0d();
switch (reg) {
case Cop0Reg::COP0_Status:
print_line("cop0_status_write(ctx, {}{})", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop0_status_write(output_file, rt);
break;
default:
fmt::print(stderr, "Unhandled cop0 register in mtc0: {}\n", (int)reg);
@ -408,38 +450,25 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// If so, create a temp to preserve the addend register's value
if (find_result != stats.jump_tables.end()) {
const N64Recomp::JumpTable& cur_jtbl = *find_result;
print_line("gpr jr_addend_{:08X} = {}{}", cur_jtbl.jr_vram, ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg);
print_indent();
generator.emit_variable_declaration(output_file, fmt::format("jr_addend_{:08X}", cur_jtbl.jr_vram), cur_jtbl.addend_reg);
}
}
break;
case InstrId::cpu_mult:
print_line("result = S64(S32({}{})) * S64(S32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_dmult:
print_line("DMULT(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_multu:
print_line("result = U64(U32({}{})) * U64(U32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_dmultu:
print_line("DMULTU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_div:
// Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1
print_line("lo = S32(S64(S32({}{})) / S64(S32({}{}))); hi = S32(S64(S32({}{})) % S64(S32({}{})))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_ddiv:
print_line("DDIV(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_divu:
print_line("lo = S32(U32({}{}) / U32({}{})); hi = S32(U32({}{}) % U32({}{}))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_ddivu:
print_line("DDIVU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_muldiv(output_file, instr.getUniqueId(), rs, rt);
break;
// Branches
case InstrId::cpu_jal:
if (!print_func_call(instr.getBranchVramGeneric())) {
if (!print_func_call_by_address(instr.getBranchVramGeneric())) {
return false;
}
break;
@ -450,18 +479,19 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
return false;
}
needs_link_branch = true;
print_unconditional_branch("LOOKUP_FUNC({}{})(rdram, ctx)", ctx_gpr_prefix(rs), rs);
print_func_call_by_register(rs);
break;
case InstrId::cpu_j:
case InstrId::cpu_b:
{
uint32_t branch_target = instr.getBranchVramGeneric();
if (branch_target == instr_vram) {
print_line("pause_self(rdram)");
print_indent();
generator.emit_pause_self(output_file);
}
// Check if the branch is within this function
else if (branch_target >= func.vram && branch_target < func_vram_end) {
print_unconditional_branch("goto L_{:08X}", branch_target);
print_goto_with_delay_slot(fmt::format("L_{:08X}", branch_target));
}
// This may be a tail call in the middle of the control flow due to a previous check
// For example:
@ -476,11 +506,12 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// ```
// FIXME: how to deal with static functions?
else if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target);
if (!print_func_call(branch_target, false)) {
fmt::print("[Info] Tail call in {} to 0x{:08X}\n", func.name, branch_target);
if (!print_func_call_by_address(branch_target, true)) {
return false;
}
print_line("return");
print_indent();
generator.emit_return(output_file);
}
else {
fmt::print(stderr, "Unhandled branch in {} at 0x{:08X} to 0x{:08X}\n", func.name, instr_vram, branch_target);
@ -490,7 +521,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
break;
case InstrId::cpu_jr:
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
print_unconditional_branch("return");
print_return_with_delay_slot();
} else {
auto jtbl_find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const N64Recomp::JumpTable& jtbl) {
@ -499,58 +530,41 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (jtbl_find_result != stats.jump_tables.end()) {
const N64Recomp::JumpTable& cur_jtbl = *jtbl_find_result;
bool dummy_needs_link_branch, dummy_is_branch_likely;
size_t next_reloc_index = reloc_index;
uint32_t next_vram = instr_vram + 4;
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
next_reloc_index++;
}
if (!process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
if (!process_delay_slot(false)) {
return false;
}
print_indent();
fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram);
generator.emit_switch(output_file, fmt::format("jr_addend_{:08X}", cur_jtbl.jr_vram), 2);
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
print_indent();
print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]);
print_indent();
generator.emit_case(output_file, entry_index, fmt::format("L_{:08X}", cur_jtbl.entries[entry_index]));
}
print_indent();
print_line("default: switch_error(__func__, 0x{:08X}, 0x{:08X})", instr_vram, cur_jtbl.vram);
print_indent();
fmt::print(output_file, "}}\n");
generator.emit_switch_error(output_file, instr_vram, cur_jtbl.vram);
print_indent();
generator.emit_switch_close(output_file);
break;
}
auto jump_find_result = std::find_if(stats.absolute_jumps.begin(), stats.absolute_jumps.end(),
[instr_vram](const N64Recomp::AbsoluteJump& jump) {
return jump.instruction_vram == instr_vram;
});
if (jump_find_result != stats.absolute_jumps.end()) {
print_unconditional_branch("LOOKUP_FUNC({})(rdram, ctx)", (uint64_t)(int32_t)jump_find_result->jump_target);
// jr doesn't link so it acts like a tail call, meaning we should return directly after the jump returns
print_line("return");
break;
}
bool is_tail_call = instr_vram == func_vram_end - 2 * sizeof(func.words[0]);
if (is_tail_call) {
fmt::print("Indirect tail call in {}\n", func.name);
print_unconditional_branch("LOOKUP_FUNC({}{})(rdram, ctx)", ctx_gpr_prefix(rs), rs);
print_line("return");
break;
}
fmt::print(stderr, "No jump table found for jr at 0x{:08X} and not tail call\n", instr_vram);
fmt::print("[Info] Indirect tail call in {}\n", func.name);
print_func_call_by_register(rs);
print_indent();
generator.emit_return(output_file);
break;
}
break;
case InstrId::cpu_syscall:
print_line("recomp_syscall_handler(rdram, ctx, 0x{:08X})", instr_vram);
print_indent();
generator.emit_syscall(output_file, instr_vram);
// syscalls don't link, so treat it like a tail call
print_line("return");
print_indent();
generator.emit_return(output_file);
break;
case InstrId::cpu_break:
print_line("do_break({})", instr_vram);
print_indent();
generator.emit_do_break(output_file, instr_vram);
break;
// Cop1 rounding mode
@ -559,21 +573,22 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
fmt::print(stderr, "Invalid FP control register for ctc1: {}\n", cop1_cs);
return false;
}
print_line("rounding_mode = ({}{}) & 0x3", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop1_cs_write(output_file, rt);
break;
case InstrId::cpu_cfc1:
if (cop1_cs != 31) {
fmt::print(stderr, "Invalid FP control register for cfc1: {}\n", cop1_cs);
return false;
}
print_line("{}{} = rounding_mode", ctx_gpr_prefix(rt), rt);
print_indent();
generator.emit_cop1_cs_read(output_file, rt);
break;
default:
handled = false;
break;
}
CGenerator generator{};
InstructionContext instruction_context{};
instruction_context.rd = rd;
instruction_context.rs = rs;
@ -658,7 +673,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (op.check_nan) {
do_check_nan(output_file, generator, instruction_context, op.operands.operands[0]);
do_check_nan(output_file, generator, instruction_context, op.operands.operands[1]);
fmt::print(output_file, "\n ");
fmt::print(output_file, "\n");
print_indent();
}
generator.process_binary_op(output_file, op, instruction_context);
@ -677,7 +693,8 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (op.check_nan) {
do_check_nan(output_file, generator, instruction_context, op.input);
fmt::print(output_file, "\n ");
fmt::print(output_file, "\n");
print_indent();
}
generator.process_unary_op(output_file, op, instruction_context);
@ -691,7 +708,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
print_indent();
if (find_conditional_branch_it->second.link) {
if (!print_func_call(instr.getBranchVramGeneric())) {
if (!print_func_call_by_address(instr.getBranchVramGeneric())) {
return false;
}
}
@ -701,6 +718,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
}
}
print_indent();
generator.emit_branch_close(output_file);
is_branch_likely = find_conditional_branch_it->second.likely;
@ -727,23 +745,19 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
// TODO is this used?
if (emit_link_branch) {
fmt::print(output_file, " after_{}:\n", link_branch_index);
print_indent();
generator.emit_label(output_file, fmt::format("after_{}", link_branch_index));
}
return true;
}
bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
template <typename GeneratorType>
bool recompile_function_impl(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
//fmt::print("Recompiling {}\n", func.name);
std::vector<rabbitizer::InstructionCpu> instructions;
fmt::print(output_file,
"RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n"
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
" uint64_t hi = 0, lo = 0, result = 0;\n"
" unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n"
" int c1cs = 0;\n", // cop1 conditional signal
func.name);
generator.emit_function_start(output_file, func.name);
if (context.trace_mode) {
fmt::print(output_file,
@ -808,11 +822,11 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
bool is_branch_likely = false;
// If we're in the delay slot of a likely instruction, emit a goto to skip the instruction before any labels
if (in_likely_delay_slot) {
fmt::print(output_file, " goto skip_{};\n", num_likely_branches);
generator.emit_goto(output_file, fmt::format("skip_{}", num_likely_branches));
}
// If there are any other branch labels to insert and we're at the next one, insert it
if (cur_label != branch_labels.end() && vram >= *cur_label) {
fmt::print(output_file, "L_{:08X}:\n", *cur_label);
generator.emit_label(output_file, fmt::format("L_{:08X}", *cur_label));
++cur_label;
}
@ -822,7 +836,7 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
}
// Process the current instruction and check for errors
if (process_instruction(context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) {
if (process_instruction(generator, context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) {
fmt::print(stderr, "Error in recompiling {}, clearing output file\n", func.name);
output_file.clear();
return false;
@ -833,7 +847,8 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
}
// Now that the instruction has been processed, emit a skip label for the likely branch if needed
if (in_likely_delay_slot) {
fmt::print(output_file, " skip_{}:\n", num_likely_branches);
fmt::print(output_file, " ");
generator.emit_label(output_file, fmt::format("skip_{}", num_likely_branches));
num_likely_branches++;
}
// Mark the next instruction as being in a likely delay slot if the
@ -844,7 +859,13 @@ bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64R
}
// Terminate the function
fmt::print(output_file, ";}}\n");
generator.emit_function_end(output_file);
return true;
}
// Wrap the templated function with CGenerator as the template parameter.
bool N64Recomp::recompile_function(const N64Recomp::Context& context, const N64Recomp::Function& func, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
CGenerator generator{};
return recompile_function_impl(generator, context, func, output_file, static_funcs_out, tag_reference_relocs);
}