mirror of
https://github.com/N64Recomp/N64Recomp.git
synced 2025-05-14 08:12:19 +00:00
879 lines
36 KiB
C++
879 lines
36 KiB
C++
#include <vector>
|
|
#include <set>
|
|
#include <unordered_set>
|
|
#include <unordered_map>
|
|
#include <cassert>
|
|
|
|
#include "rabbitizer.hpp"
|
|
#include "fmt/format.h"
|
|
#include "fmt/ostream.h"
|
|
|
|
#include "recompiler/context.h"
|
|
#include "analysis.h"
|
|
#include "recompiler/operations.h"
|
|
#include "recompiler/generator.h"
|
|
|
|
enum class JalResolutionResult {
|
|
NoMatch,
|
|
Match,
|
|
CreateStatic,
|
|
Ambiguous,
|
|
Error
|
|
};
|
|
|
|
JalResolutionResult resolve_jal(const N64Recomp::Context& context, size_t cur_section_index, uint32_t target_func_vram, size_t& matched_function_index) {
|
|
// Look for symbols with the target vram address
|
|
const N64Recomp::Section& cur_section = context.sections[cur_section_index];
|
|
const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram);
|
|
uint32_t section_vram_start = cur_section.ram_addr;
|
|
uint32_t section_vram_end = cur_section.ram_addr + cur_section.size;
|
|
bool in_current_section = target_func_vram >= section_vram_start && target_func_vram < section_vram_end;
|
|
bool exact_match_found = false;
|
|
|
|
// Use a thread local to prevent reallocation across runs and to allow multi-threading in the future.
|
|
thread_local std::vector<size_t> matched_funcs{};
|
|
matched_funcs.clear();
|
|
|
|
// Evaluate any functions with the target address to see if they're potential candidates for JAL resolution.
|
|
if (matching_funcs_find != context.functions_by_vram.end()) {
|
|
for (size_t target_func_index : matching_funcs_find->second) {
|
|
const auto& target_func = context.functions[target_func_index];
|
|
|
|
// Zero-sized symbol handling. unless there's only one matching target.
|
|
if (target_func.words.empty()) {
|
|
// Allow zero-sized symbols between 0x8F000000 and 0x90000000 for use with patches.
|
|
// TODO make this configurable or come up with a more sensible solution for dealing with manual symbols for patches.
|
|
if (target_func.vram < 0x8F000000 || target_func.vram > 0x90000000) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Immediately accept a function in the same section as this one, since it must also be loaded if the current function is.
|
|
if (target_func.section_index == cur_section_index) {
|
|
exact_match_found = true;
|
|
matched_funcs.clear();
|
|
matched_funcs.push_back(target_func_index);
|
|
break;
|
|
}
|
|
|
|
// If the function's section isn't relocatable, add the function as a candidate.
|
|
const auto& target_func_section = context.sections[target_func.section_index];
|
|
if (!target_func_section.relocatable) {
|
|
matched_funcs.push_back(target_func_index);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the target vram is in the current section, only allow exact matches.
|
|
if (in_current_section) {
|
|
// If an exact match was found, use it.
|
|
if (exact_match_found) {
|
|
matched_function_index = matched_funcs[0];
|
|
return JalResolutionResult::Match;
|
|
}
|
|
// Otherwise, create a static function at the target address.
|
|
else {
|
|
return JalResolutionResult::CreateStatic;
|
|
}
|
|
}
|
|
// Otherwise, disambiguate based on the matches found.
|
|
else {
|
|
// If there were no matches then JAL resolution has failed.
|
|
// A static can't be created as the target section is unknown.
|
|
if (matched_funcs.size() == 0) {
|
|
return JalResolutionResult::NoMatch;
|
|
}
|
|
// If there was an exact match, use it.
|
|
else if (matched_funcs.size() == 1) {
|
|
matched_function_index = matched_funcs[0];
|
|
return JalResolutionResult::Match;
|
|
}
|
|
// If there's more than one match, use an indirect jump to resolve the function at runtime.
|
|
else {
|
|
return JalResolutionResult::Ambiguous;
|
|
}
|
|
}
|
|
|
|
// This should never be hit, so return an error.
|
|
return JalResolutionResult::Error;
|
|
}
|
|
|
|
using InstrId = rabbitizer::InstrId::UniqueId;
|
|
using Cop0Reg = rabbitizer::Registers::Cpu::Cop0;
|
|
|
|
std::string_view ctx_gpr_prefix(int reg) {
|
|
if (reg != 0) {
|
|
return "ctx->r";
|
|
}
|
|
return "";
|
|
}
|
|
|
|
template <typename GeneratorType>
|
|
bool process_instruction(GeneratorType& generator, const N64Recomp::Context& context, const N64Recomp::Function& func, const N64Recomp::FunctionStats& stats, const std::unordered_set<uint32_t>& jtbl_lw_instructions, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ostream& output_file, bool indent, bool emit_link_branch, int link_branch_index, size_t reloc_index, bool& needs_link_branch, bool& is_branch_likely, bool tag_reference_relocs, std::span<std::vector<uint32_t>> static_funcs_out) {
|
|
using namespace N64Recomp;
|
|
|
|
const auto& section = context.sections[func.section_index];
|
|
const auto& instr = instructions[instr_index];
|
|
needs_link_branch = false;
|
|
is_branch_likely = false;
|
|
uint32_t instr_vram = instr.getVram();
|
|
InstrId instr_id = instr.getUniqueId();
|
|
|
|
auto print_indent = [&]() {
|
|
fmt::print(output_file, " ");
|
|
};
|
|
|
|
auto hook_find = func.function_hooks.find(instr_index);
|
|
if (hook_find != func.function_hooks.end()) {
|
|
fmt::print(output_file, " {}\n", hook_find->second);
|
|
if (indent) {
|
|
print_indent();
|
|
}
|
|
}
|
|
|
|
// Output a comment with the original instruction
|
|
print_indent();
|
|
if (instr.isBranch() || instr_id == InstrId::cpu_j) {
|
|
generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric()))));
|
|
} else if (instr_id == InstrId::cpu_jal) {
|
|
generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric()))));
|
|
} else {
|
|
generator.emit_comment(fmt::format("0x{:08X}: {}", instr_vram, instr.disassemble(0)));
|
|
}
|
|
|
|
// Replace loads for jump table entries into addiu. This leaves the jump table entry's address in the output register
|
|
// instead of the entry's value, which can then be used to determine the offset from the start of the jump table.
|
|
if (jtbl_lw_instructions.contains(instr_vram)) {
|
|
assert(instr_id == InstrId::cpu_lw);
|
|
instr_id = InstrId::cpu_addiu;
|
|
}
|
|
|
|
N64Recomp::RelocType reloc_type = N64Recomp::RelocType::R_MIPS_NONE;
|
|
uint32_t reloc_section = 0;
|
|
uint32_t reloc_target_section_offset = 0;
|
|
size_t reloc_reference_symbol = (size_t)-1;
|
|
|
|
uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]);
|
|
|
|
uint16_t imm = instr.Get_immediate();
|
|
|
|
// Check if this instruction has a reloc.
|
|
if (section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) {
|
|
// Get the reloc data for this instruction
|
|
const auto& reloc = section.relocs[reloc_index];
|
|
reloc_section = reloc.target_section;
|
|
|
|
// Check if the relocation references a relocatable section.
|
|
bool target_relocatable = false;
|
|
if (!reloc.reference_symbol && reloc_section != N64Recomp::SectionAbsolute) {
|
|
const auto& target_section = context.sections[reloc_section];
|
|
target_relocatable = target_section.relocatable;
|
|
}
|
|
|
|
// Only process this relocation if the target section is relocatable or if this relocation targets a reference symbol.
|
|
if (target_relocatable || reloc.reference_symbol) {
|
|
// Record the reloc's data.
|
|
reloc_type = reloc.type;
|
|
reloc_target_section_offset = reloc.target_section_offset;
|
|
// Ignore all relocs that aren't MIPS_HI16, MIPS_LO16 or MIPS_26.
|
|
if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16 || reloc_type == N64Recomp::RelocType::R_MIPS_26) {
|
|
if (reloc.reference_symbol) {
|
|
reloc_reference_symbol = reloc.symbol_index;
|
|
// Don't try to relocate special section symbols.
|
|
if (context.is_regular_reference_section(reloc.target_section) || reloc_section == N64Recomp::SectionAbsolute) {
|
|
bool ref_section_relocatable = context.is_reference_section_relocatable(reloc.target_section);
|
|
// Resolve HI16 and LO16 reference symbol relocs to non-relocatable sections by patching the instruction immediate.
|
|
if (!ref_section_relocatable && (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16)) {
|
|
uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section);
|
|
uint32_t full_immediate = reloc.target_section_offset + ref_section_vram;
|
|
|
|
if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16) {
|
|
imm = (full_immediate >> 16) + ((full_immediate >> 15) & 1);
|
|
}
|
|
else if (reloc_type == N64Recomp::RelocType::R_MIPS_LO16) {
|
|
imm = full_immediate & 0xFFFF;
|
|
}
|
|
|
|
// The reloc has been processed, so set it to none to prevent it getting processed a second time during instruction code generation.
|
|
reloc_type = N64Recomp::RelocType::R_MIPS_NONE;
|
|
reloc_reference_symbol = (size_t)-1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Repoint bss relocations at their non-bss counterpart section.
|
|
auto find_bss_it = context.bss_section_to_section.find(reloc_section);
|
|
if (find_bss_it != context.bss_section_to_section.end()) {
|
|
reloc_section = find_bss_it->second;
|
|
}
|
|
}
|
|
}
|
|
|
|
auto process_delay_slot = [&](bool use_indent) {
|
|
if (instr_index < instructions.size() - 1) {
|
|
bool dummy_needs_link_branch;
|
|
bool dummy_is_branch_likely;
|
|
size_t next_reloc_index = reloc_index;
|
|
uint32_t next_vram = instr_vram + 4;
|
|
if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) {
|
|
next_reloc_index++;
|
|
}
|
|
if (!process_instruction(generator, context, func, stats, jtbl_lw_instructions, instr_index + 1, instructions, output_file, use_indent, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, tag_reference_relocs, static_funcs_out)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
|
|
auto print_link_branch = [&]() {
|
|
if (needs_link_branch) {
|
|
print_indent();
|
|
generator.emit_goto(fmt::format("after_{}", link_branch_index));
|
|
}
|
|
};
|
|
|
|
auto print_return_with_delay_slot = [&]() {
|
|
if (!process_delay_slot(false)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_return(context);
|
|
print_link_branch();
|
|
return true;
|
|
};
|
|
|
|
auto print_goto_with_delay_slot = [&](const std::string& target) {
|
|
if (!process_delay_slot(false)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_goto(target);
|
|
print_link_branch();
|
|
return true;
|
|
};
|
|
|
|
auto print_func_call_by_register = [&](int reg) {
|
|
if (!process_delay_slot(false)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_function_call_by_register(reg);
|
|
print_link_branch();
|
|
return true;
|
|
};
|
|
|
|
auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &print_link_branch]
|
|
(uint32_t target_func_vram, bool tail_call = false, bool indent = false)
|
|
{
|
|
bool call_by_lookup = false;
|
|
bool call_by_name = false;
|
|
// Event symbol, emit a call to the runtime to trigger this event.
|
|
if (reloc_section == N64Recomp::SectionEvent) {
|
|
needs_link_branch = !tail_call;
|
|
if (indent) {
|
|
print_indent();
|
|
}
|
|
if (!process_delay_slot(false)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_trigger_event((uint32_t)reloc_reference_symbol);
|
|
print_link_branch();
|
|
}
|
|
// Normal symbol or reference symbol,
|
|
else {
|
|
std::string jal_target_name{};
|
|
size_t matched_func_index = (size_t)-1;
|
|
if (reloc_reference_symbol != (size_t)-1) {
|
|
if (reloc_type != N64Recomp::RelocType::R_MIPS_26) {
|
|
fmt::print(stderr, "Unsupported reloc type {} on jal instruction in {}\n", (int)reloc_type, func.name);
|
|
return false;
|
|
}
|
|
|
|
if (!context.skip_validating_reference_symbols) {
|
|
const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol);
|
|
if (ref_symbol.section_offset != reloc_target_section_offset) {
|
|
fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
JalResolutionResult jal_result = resolve_jal(context, func.section_index, target_func_vram, matched_func_index);
|
|
|
|
switch (jal_result) {
|
|
case JalResolutionResult::NoMatch:
|
|
fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram);
|
|
return false;
|
|
case JalResolutionResult::Match:
|
|
jal_target_name = context.functions[matched_func_index].name;
|
|
break;
|
|
case JalResolutionResult::CreateStatic:
|
|
// Create a static function add it to the static function list for this section.
|
|
jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram);
|
|
static_funcs_out[func.section_index].push_back(target_func_vram);
|
|
call_by_name = true;
|
|
break;
|
|
case JalResolutionResult::Ambiguous:
|
|
fmt::print(stderr, "[Info] Ambiguous jal target 0x{:08X} in function {}, falling back to function lookup\n", target_func_vram, func.name);
|
|
// Relocation isn't necessary for jumps inside a relocatable section, as this code path will never run if the target vram
|
|
// is in the current function's section (see the branch for `in_current_section` above).
|
|
// If a game ever needs to jump between multiple relocatable sections, relocation will be necessary here.
|
|
call_by_lookup = true;
|
|
break;
|
|
case JalResolutionResult::Error:
|
|
fmt::print(stderr, "Internal error when resolving jal to address 0x{:08X} in function {}. Please report this issue.\n", target_func_vram, func.name);
|
|
return false;
|
|
}
|
|
}
|
|
needs_link_branch = !tail_call;
|
|
if (indent) {
|
|
print_indent();
|
|
}
|
|
if (!process_delay_slot(false)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
if (reloc_reference_symbol != (size_t)-1) {
|
|
generator.emit_function_call_reference_symbol(context, reloc_section, reloc_reference_symbol, reloc_target_section_offset);
|
|
}
|
|
else if (call_by_lookup) {
|
|
generator.emit_function_call_lookup(target_func_vram);
|
|
}
|
|
else if (call_by_name) {
|
|
generator.emit_named_function_call(jal_target_name);
|
|
}
|
|
else {
|
|
generator.emit_function_call(context, matched_func_index);
|
|
}
|
|
print_link_branch();
|
|
}
|
|
return true;
|
|
};
|
|
|
|
auto print_branch = [&](uint32_t branch_target) {
|
|
// If the branch target is outside the current function, check if it can be treated as a tail call.
|
|
if (branch_target < func.vram || branch_target >= func_vram_end) {
|
|
// If the branch target is the start of some known function, this can be handled as a tail call.
|
|
// FIXME: how to deal with static functions?
|
|
if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
|
|
fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target);
|
|
if (!print_func_call_by_address(branch_target, true, true)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_return(context);
|
|
// TODO check if this branch close should exist.
|
|
// print_indent();
|
|
// generator.emit_branch_close();
|
|
return true;
|
|
}
|
|
|
|
fmt::print(stderr, "[Warn] Function {} is branching outside of the function (to 0x{:08X})\n", func.name, branch_target);
|
|
}
|
|
|
|
if (!process_delay_slot(true)) {
|
|
return false;
|
|
}
|
|
|
|
print_indent();
|
|
print_indent();
|
|
generator.emit_goto(fmt::format("L_{:08X}", branch_target));
|
|
// TODO check if this link branch ever exists.
|
|
if (needs_link_branch) {
|
|
print_indent();
|
|
print_indent();
|
|
generator.emit_goto(fmt::format("after_{}", link_branch_index));
|
|
}
|
|
return true;
|
|
};
|
|
|
|
if (indent) {
|
|
print_indent();
|
|
}
|
|
|
|
int rd = (int)instr.GetO32_rd();
|
|
int rs = (int)instr.GetO32_rs();
|
|
int rt = (int)instr.GetO32_rt();
|
|
int sa = (int)instr.Get_sa();
|
|
|
|
int fd = (int)instr.GetO32_fd();
|
|
int fs = (int)instr.GetO32_fs();
|
|
int ft = (int)instr.GetO32_ft();
|
|
|
|
int cop1_cs = (int)instr.Get_cop1cs();
|
|
|
|
bool handled = true;
|
|
|
|
switch (instr_id) {
|
|
case InstrId::cpu_nop:
|
|
fmt::print(output_file, "\n");
|
|
break;
|
|
// Cop0 (Limited functionality)
|
|
case InstrId::cpu_mfc0:
|
|
{
|
|
Cop0Reg reg = instr.Get_cop0d();
|
|
switch (reg) {
|
|
case Cop0Reg::COP0_Status:
|
|
print_indent();
|
|
generator.emit_cop0_status_read(rt);
|
|
break;
|
|
default:
|
|
fmt::print(stderr, "Unhandled cop0 register in mfc0: {}\n", (int)reg);
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
case InstrId::cpu_mtc0:
|
|
{
|
|
Cop0Reg reg = instr.Get_cop0d();
|
|
switch (reg) {
|
|
case Cop0Reg::COP0_Status:
|
|
print_indent();
|
|
generator.emit_cop0_status_write(rt);
|
|
break;
|
|
default:
|
|
fmt::print(stderr, "Unhandled cop0 register in mtc0: {}\n", (int)reg);
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
// Arithmetic
|
|
case InstrId::cpu_add:
|
|
case InstrId::cpu_addu:
|
|
{
|
|
// Check if this addu belongs to a jump table load
|
|
auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
|
|
[instr_vram](const N64Recomp::JumpTable& jtbl) {
|
|
return jtbl.addu_vram == instr_vram;
|
|
});
|
|
// If so, create a temp to preserve the addend register's value
|
|
if (find_result != stats.jump_tables.end()) {
|
|
const N64Recomp::JumpTable& cur_jtbl = *find_result;
|
|
print_indent();
|
|
generator.emit_jtbl_addend_declaration(cur_jtbl, cur_jtbl.addend_reg);
|
|
}
|
|
}
|
|
break;
|
|
case InstrId::cpu_mult:
|
|
case InstrId::cpu_dmult:
|
|
case InstrId::cpu_multu:
|
|
case InstrId::cpu_dmultu:
|
|
case InstrId::cpu_div:
|
|
case InstrId::cpu_ddiv:
|
|
case InstrId::cpu_divu:
|
|
case InstrId::cpu_ddivu:
|
|
print_indent();
|
|
generator.emit_muldiv(instr_id, rs, rt);
|
|
break;
|
|
// Branches
|
|
case InstrId::cpu_jal:
|
|
if (!print_func_call_by_address(instr.getBranchVramGeneric())) {
|
|
return false;
|
|
}
|
|
break;
|
|
case InstrId::cpu_jalr:
|
|
// jalr can only be handled with $ra as the return address register
|
|
if (rd != (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
|
|
fmt::print(stderr, "Invalid return address reg for jalr: f{}\n", rd);
|
|
return false;
|
|
}
|
|
needs_link_branch = true;
|
|
print_func_call_by_register(rs);
|
|
break;
|
|
case InstrId::cpu_j:
|
|
case InstrId::cpu_b:
|
|
{
|
|
uint32_t branch_target = instr.getBranchVramGeneric();
|
|
if (branch_target == instr_vram) {
|
|
print_indent();
|
|
generator.emit_pause_self();
|
|
}
|
|
// Check if the branch is within this function
|
|
else if (branch_target >= func.vram && branch_target < func_vram_end) {
|
|
print_goto_with_delay_slot(fmt::format("L_{:08X}", branch_target));
|
|
}
|
|
// This may be a tail call in the middle of the control flow due to a previous check
|
|
// For example:
|
|
// ```c
|
|
// void test() {
|
|
// if (SOME_CONDITION) {
|
|
// do_a();
|
|
// } else {
|
|
// do_b();
|
|
// }
|
|
// }
|
|
// ```
|
|
// FIXME: how to deal with static functions?
|
|
else if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
|
|
fmt::print("[Info] Tail call in {} to 0x{:08X}\n", func.name, branch_target);
|
|
if (!print_func_call_by_address(branch_target, true)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_return(context);
|
|
}
|
|
else {
|
|
fmt::print(stderr, "Unhandled branch in {} at 0x{:08X} to 0x{:08X}\n", func.name, instr_vram, branch_target);
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case InstrId::cpu_jr:
|
|
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
|
|
print_return_with_delay_slot();
|
|
} else {
|
|
auto jtbl_find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
|
|
[instr_vram](const N64Recomp::JumpTable& jtbl) {
|
|
return jtbl.jr_vram == instr_vram;
|
|
});
|
|
|
|
if (jtbl_find_result != stats.jump_tables.end()) {
|
|
const N64Recomp::JumpTable& cur_jtbl = *jtbl_find_result;
|
|
if (!process_delay_slot(false)) {
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_switch(context, cur_jtbl, rs);
|
|
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
|
|
print_indent();
|
|
print_indent();
|
|
generator.emit_case(entry_index, fmt::format("L_{:08X}", cur_jtbl.entries[entry_index]));
|
|
}
|
|
print_indent();
|
|
print_indent();
|
|
generator.emit_switch_error(instr_vram, cur_jtbl.vram);
|
|
print_indent();
|
|
generator.emit_switch_close();
|
|
break;
|
|
}
|
|
|
|
fmt::print("[Info] Indirect tail call in {}\n", func.name);
|
|
print_func_call_by_register(rs);
|
|
print_indent();
|
|
generator.emit_return(context);
|
|
break;
|
|
}
|
|
break;
|
|
case InstrId::cpu_syscall:
|
|
print_indent();
|
|
generator.emit_syscall(instr_vram);
|
|
// syscalls don't link, so treat it like a tail call
|
|
print_indent();
|
|
generator.emit_return(context);
|
|
break;
|
|
case InstrId::cpu_break:
|
|
print_indent();
|
|
generator.emit_do_break(instr_vram);
|
|
break;
|
|
|
|
// Cop1 rounding mode
|
|
case InstrId::cpu_ctc1:
|
|
if (cop1_cs != 31) {
|
|
fmt::print(stderr, "Invalid FP control register for ctc1: {}\n", cop1_cs);
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_cop1_cs_write(rt);
|
|
break;
|
|
case InstrId::cpu_cfc1:
|
|
if (cop1_cs != 31) {
|
|
fmt::print(stderr, "Invalid FP control register for cfc1: {}\n", cop1_cs);
|
|
return false;
|
|
}
|
|
print_indent();
|
|
generator.emit_cop1_cs_read(rt);
|
|
break;
|
|
default:
|
|
handled = false;
|
|
break;
|
|
}
|
|
|
|
InstructionContext instruction_context{};
|
|
instruction_context.rd = rd;
|
|
instruction_context.rs = rs;
|
|
instruction_context.rt = rt;
|
|
instruction_context.sa = sa;
|
|
instruction_context.fd = fd;
|
|
instruction_context.fs = fs;
|
|
instruction_context.ft = ft;
|
|
instruction_context.cop1_cs = cop1_cs;
|
|
instruction_context.imm16 = imm;
|
|
instruction_context.reloc_tag_as_reference = (reloc_reference_symbol != (size_t)-1) && tag_reference_relocs;
|
|
instruction_context.reloc_type = reloc_type;
|
|
instruction_context.reloc_section_index = reloc_section;
|
|
instruction_context.reloc_target_section_offset = reloc_target_section_offset;
|
|
|
|
auto do_check_fr = [](const GeneratorType& generator, const InstructionContext& ctx, Operand operand) {
|
|
switch (operand) {
|
|
case Operand::Fd:
|
|
case Operand::FdDouble:
|
|
case Operand::FdU32L:
|
|
case Operand::FdU32H:
|
|
case Operand::FdU64:
|
|
generator.emit_check_fr(ctx.fd);
|
|
break;
|
|
case Operand::Fs:
|
|
case Operand::FsDouble:
|
|
case Operand::FsU32L:
|
|
case Operand::FsU32H:
|
|
case Operand::FsU64:
|
|
generator.emit_check_fr(ctx.fs);
|
|
break;
|
|
case Operand::Ft:
|
|
case Operand::FtDouble:
|
|
case Operand::FtU32L:
|
|
case Operand::FtU32H:
|
|
case Operand::FtU64:
|
|
generator.emit_check_fr(ctx.ft);
|
|
break;
|
|
default:
|
|
// No MIPS3 float check needed for non-float operands.
|
|
break;
|
|
}
|
|
};
|
|
|
|
auto do_check_nan = [](const GeneratorType& generator, const InstructionContext& ctx, Operand operand) {
|
|
switch (operand) {
|
|
case Operand::Fd:
|
|
generator.emit_check_nan(ctx.fd, false);
|
|
break;
|
|
case Operand::Fs:
|
|
generator.emit_check_nan(ctx.fs, false);
|
|
break;
|
|
case Operand::Ft:
|
|
generator.emit_check_nan(ctx.ft, false);
|
|
break;
|
|
case Operand::FdDouble:
|
|
generator.emit_check_nan(ctx.fd, true);
|
|
break;
|
|
case Operand::FsDouble:
|
|
generator.emit_check_nan(ctx.fs, true);
|
|
break;
|
|
case Operand::FtDouble:
|
|
generator.emit_check_nan(ctx.ft, true);
|
|
break;
|
|
default:
|
|
// No NaN checks needed for non-float operands.
|
|
break;
|
|
}
|
|
};
|
|
|
|
auto find_binary_it = binary_ops.find(instr_id);
|
|
if (find_binary_it != binary_ops.end()) {
|
|
print_indent();
|
|
const BinaryOp& op = find_binary_it->second;
|
|
|
|
if (op.check_fr) {
|
|
do_check_fr(generator, instruction_context, op.output);
|
|
do_check_fr(generator, instruction_context, op.operands.operands[0]);
|
|
do_check_fr(generator, instruction_context, op.operands.operands[1]);
|
|
}
|
|
|
|
if (op.check_nan) {
|
|
do_check_nan(generator, instruction_context, op.operands.operands[0]);
|
|
do_check_nan(generator, instruction_context, op.operands.operands[1]);
|
|
fmt::print(output_file, "\n");
|
|
print_indent();
|
|
}
|
|
|
|
generator.process_binary_op(op, instruction_context);
|
|
handled = true;
|
|
}
|
|
|
|
auto find_unary_it = unary_ops.find(instr_id);
|
|
if (find_unary_it != unary_ops.end()) {
|
|
print_indent();
|
|
const UnaryOp& op = find_unary_it->second;
|
|
|
|
if (op.check_fr) {
|
|
do_check_fr(generator, instruction_context, op.output);
|
|
do_check_fr(generator, instruction_context, op.input);
|
|
}
|
|
|
|
if (op.check_nan) {
|
|
do_check_nan(generator, instruction_context, op.input);
|
|
fmt::print(output_file, "\n");
|
|
print_indent();
|
|
}
|
|
|
|
generator.process_unary_op(op, instruction_context);
|
|
handled = true;
|
|
}
|
|
|
|
auto find_conditional_branch_it = conditional_branch_ops.find(instr_id);
|
|
if (find_conditional_branch_it != conditional_branch_ops.end()) {
|
|
print_indent();
|
|
// TODO combining the branch condition and branch target into one generator call would allow better optimization in the runtime's JIT generator.
|
|
// This would require splitting into a conditional jump method and conditional function call method.
|
|
generator.emit_branch_condition(find_conditional_branch_it->second, instruction_context);
|
|
|
|
print_indent();
|
|
if (find_conditional_branch_it->second.link) {
|
|
if (!print_func_call_by_address(instr.getBranchVramGeneric())) {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
if (!print_branch((uint32_t)instr.getBranchVramGeneric())) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
print_indent();
|
|
generator.emit_branch_close();
|
|
|
|
is_branch_likely = find_conditional_branch_it->second.likely;
|
|
handled = true;
|
|
}
|
|
|
|
auto find_store_it = store_ops.find(instr_id);
|
|
if (find_store_it != store_ops.end()) {
|
|
print_indent();
|
|
const StoreOp& op = find_store_it->second;
|
|
|
|
if (op.type == StoreOpType::SDC1) {
|
|
do_check_fr(generator, instruction_context, op.value_input);
|
|
}
|
|
|
|
generator.process_store_op(op, instruction_context);
|
|
handled = true;
|
|
}
|
|
|
|
if (!handled) {
|
|
fmt::print(stderr, "Unhandled instruction: {}\n", instr.getOpcodeName());
|
|
return false;
|
|
}
|
|
|
|
// TODO is this used?
|
|
if (emit_link_branch) {
|
|
print_indent();
|
|
generator.emit_label(fmt::format("after_{}", link_branch_index));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename GeneratorType>
|
|
bool recompile_function_impl(GeneratorType& generator, const N64Recomp::Context& context, size_t func_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
|
|
const N64Recomp::Function& func = context.functions[func_index];
|
|
//fmt::print("Recompiling {}\n", func.name);
|
|
std::vector<rabbitizer::InstructionCpu> instructions;
|
|
|
|
generator.emit_function_start(func.name, func_index);
|
|
|
|
if (context.trace_mode) {
|
|
fmt::print(output_file,
|
|
" TRACE_ENTRY()\n",
|
|
func.name);
|
|
}
|
|
|
|
// Skip analysis and recompilation of this function is stubbed.
|
|
if (!func.stubbed) {
|
|
// Use a set to sort and deduplicate labels
|
|
std::set<uint32_t> branch_labels;
|
|
instructions.reserve(func.words.size());
|
|
|
|
auto hook_find = func.function_hooks.find(-1);
|
|
if (hook_find != func.function_hooks.end()) {
|
|
fmt::print(output_file, " {}\n", hook_find->second);
|
|
}
|
|
|
|
// First pass, disassemble each instruction and collect branch labels
|
|
uint32_t vram = func.vram;
|
|
for (uint32_t word : func.words) {
|
|
const auto& instr = instructions.emplace_back(byteswap(word), vram);
|
|
|
|
// If this is a branch or a direct jump, add it to the local label list
|
|
if (instr.isBranch() || instr.getUniqueId() == rabbitizer::InstrId::UniqueId::cpu_j) {
|
|
branch_labels.insert((uint32_t)instr.getBranchVramGeneric());
|
|
}
|
|
|
|
// Advance the vram address by the size of one instruction
|
|
vram += 4;
|
|
}
|
|
|
|
// Analyze function
|
|
N64Recomp::FunctionStats stats{};
|
|
if (!N64Recomp::analyze_function(context, func, instructions, stats)) {
|
|
fmt::print(stderr, "Failed to analyze {}\n", func.name);
|
|
output_file.clear();
|
|
return false;
|
|
}
|
|
|
|
std::unordered_set<uint32_t> jtbl_lw_instructions{};
|
|
|
|
// Add jump table labels into function
|
|
for (const auto& jtbl : stats.jump_tables) {
|
|
jtbl_lw_instructions.insert(jtbl.lw_vram);
|
|
for (uint32_t jtbl_entry : jtbl.entries) {
|
|
branch_labels.insert(jtbl_entry);
|
|
}
|
|
}
|
|
|
|
// Second pass, emit code for each instruction and emit labels
|
|
auto cur_label = branch_labels.cbegin();
|
|
vram = func.vram;
|
|
int num_link_branches = 0;
|
|
int num_likely_branches = 0;
|
|
bool needs_link_branch = false;
|
|
bool in_likely_delay_slot = false;
|
|
const auto& section = context.sections[func.section_index];
|
|
size_t reloc_index = 0;
|
|
for (size_t instr_index = 0; instr_index < instructions.size(); ++instr_index) {
|
|
bool had_link_branch = needs_link_branch;
|
|
bool is_branch_likely = false;
|
|
// If we're in the delay slot of a likely instruction, emit a goto to skip the instruction before any labels
|
|
if (in_likely_delay_slot) {
|
|
generator.emit_goto(fmt::format("skip_{}", num_likely_branches));
|
|
}
|
|
// If there are any other branch labels to insert and we're at the next one, insert it
|
|
if (cur_label != branch_labels.end() && vram >= *cur_label) {
|
|
generator.emit_label(fmt::format("L_{:08X}", *cur_label));
|
|
++cur_label;
|
|
}
|
|
|
|
// Advance the reloc index until we reach the last one or until we get to/pass the current instruction
|
|
while ((reloc_index + 1) < section.relocs.size() && section.relocs[reloc_index].address < vram) {
|
|
reloc_index++;
|
|
}
|
|
|
|
// Process the current instruction and check for errors
|
|
if (process_instruction(generator, context, func, stats, jtbl_lw_instructions, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, reloc_index, needs_link_branch, is_branch_likely, tag_reference_relocs, static_funcs_out) == false) {
|
|
fmt::print(stderr, "Error in recompiling {}, clearing output file\n", func.name);
|
|
output_file.clear();
|
|
return false;
|
|
}
|
|
// If a link return branch was generated, advance the number of link return branches
|
|
if (had_link_branch) {
|
|
num_link_branches++;
|
|
}
|
|
// Now that the instruction has been processed, emit a skip label for the likely branch if needed
|
|
if (in_likely_delay_slot) {
|
|
fmt::print(output_file, " ");
|
|
generator.emit_label(fmt::format("skip_{}", num_likely_branches));
|
|
num_likely_branches++;
|
|
}
|
|
// Mark the next instruction as being in a likely delay slot if the
|
|
in_likely_delay_slot = is_branch_likely;
|
|
// Advance the vram address by the size of one instruction
|
|
vram += 4;
|
|
}
|
|
}
|
|
|
|
// Terminate the function
|
|
generator.emit_function_end();
|
|
|
|
return true;
|
|
}
|
|
|
|
// Wrap the templated function with CGenerator as the template parameter.
|
|
bool N64Recomp::recompile_function(const N64Recomp::Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
|
|
CGenerator generator{output_file};
|
|
return recompile_function_impl(generator, context, function_index, output_file, static_funcs_out, tag_reference_relocs);
|
|
}
|
|
|
|
bool N64Recomp::recompile_function_custom(Generator& generator, const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs) {
|
|
return recompile_function_impl(generator, context, function_index, output_file, static_funcs_out, tag_reference_relocs);
|
|
}
|