(Live recompiler) Add dedicated float compare operations, fix switch case handling with multiple functions, add reference symbol handling, other misc fixes

This commit is contained in:
Mr-Wiseguy 2024-12-29 17:44:44 -05:00
parent 3672805121
commit 75f2abdb7d
8 changed files with 275 additions and 100 deletions

View file

@ -11,6 +11,8 @@
#include "sljitLir.h"
static_assert(sizeof(void*) >= sizeof(sljit_uw), "`void*` must be able to hold a `sljit_uw` value for rewritable jumps!");
constexpr uint64_t rdram_offset = 0xFFFFFFFF80000000ULL;
void N64Recomp::live_recompiler_init() {
@ -31,7 +33,6 @@ namespace Registers {
constexpr int arithmetic_temp2 = SLJIT_R1;
constexpr int arithmetic_temp3 = SLJIT_R2;
constexpr int arithmetic_temp4 = SLJIT_R3;
constexpr int float_temp = SLJIT_FR0;
}
struct InnerCall {
@ -40,7 +41,8 @@ struct InnerCall {
};
struct ReferenceSymbolCall {
uint16_t reference;
N64Recomp::SymbolReference reference;
sljit_jump* jump;
};
struct SwitchErrorJump {
@ -56,8 +58,14 @@ struct N64Recomp::LiveGeneratorContext {
std::vector<sljit_label*> func_labels;
std::vector<InnerCall> inner_calls;
std::vector<std::vector<std::string>> switch_jump_labels;
// See LiveGeneratorOutput::jump_tables for info.
std::vector<void**> jump_tables;
// See LiveGeneratorOutput::jump_tables for info. Contains sljit labels so they can be linked after recompilation.
std::vector<std::pair<std::vector<sljit_label*>, std::unique_ptr<void*[]>>> unlinked_jump_tables;
// Jump tables for the current function being recompiled.
std::vector<std::unique_ptr<void*[]>> pending_jump_tables;
// See LiveGeneratorOutput::reference_symbol_jumps for info.
std::vector<std::pair<ReferenceJumpDetails, sljit_jump*>> reference_symbol_jumps;
// See LiveGeneratorOutput::import_jumps_by_index for info.
std::unordered_multimap<size_t, sljit_jump*> import_jumps_by_index;
std::vector<SwitchErrorJump> switch_error_jumps;
sljit_jump* cur_branch_jump;
};
@ -78,6 +86,11 @@ N64Recomp::LiveGenerator::~LiveGenerator() {
N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() {
LiveGeneratorOutput ret{};
if (errored) {
ret.good = false;
return ret;
}
ret.good = true;
// Populate all the pending inner function calls.
@ -147,34 +160,37 @@ N64Recomp::LiveGeneratorOutput N64Recomp::LiveGenerator::finish() {
ret.functions[func_index] = reinterpret_cast<recomp_func_t*>(sljit_get_label_addr(func_label));
}
}
context->func_labels.clear();
// Populate all the switch case addresses.
bool invalid_switch = false;
for (size_t switch_index = 0; switch_index < context->switch_jump_labels.size(); switch_index++) {
const std::vector<std::string>& cur_labels = context->switch_jump_labels[switch_index];
void** cur_jump_table = context->jump_tables[switch_index];
for (size_t case_index = 0; case_index < cur_labels.size(); case_index++) {
// Find the label.
auto find_it = context->labels.find(cur_labels[case_index]);
if (find_it == context->labels.end()) {
// Label not found, invalid switch.
// Don't return immediately, as we need to ensure that all the jump tables end up in ret
// so that it cleans them up in its destructor.
invalid_switch = true;
break;
}
// Get the reference symbol jump instruction addresses.
ret.reference_symbol_jumps.resize(context->reference_symbol_jumps.size());
for (size_t jump_index = 0; jump_index < context->reference_symbol_jumps.size(); jump_index++) {
ReferenceJumpDetails& details = context->reference_symbol_jumps[jump_index].first;
sljit_jump* jump = context->reference_symbol_jumps[jump_index].second;
// Get the label's address and place it in the jump table.
cur_jump_table[case_index] = reinterpret_cast<void*>(sljit_get_label_addr(find_it->second));
ret.reference_symbol_jumps[jump_index].first = details;
ret.reference_symbol_jumps[jump_index].second = reinterpret_cast<void*>(jump->addr);
}
ret.jump_tables.emplace_back(cur_jump_table);
}
context->switch_jump_labels.clear();
context->jump_tables.clear();
context->reference_symbol_jumps.clear();
if (invalid_switch) {
return { };
// Get the import jump instruction addresses.
ret.import_jumps_by_index.reserve(context->import_jumps_by_index.size());
for (auto& [jump_index, jump] : context->import_jumps_by_index) {
ret.import_jumps_by_index.emplace(jump_index, reinterpret_cast<void*>(jump->addr));
}
context->import_jumps_by_index.clear();
// Populate label addresses for the jump tables and place them in the output.
for (auto& [labels, jump_table] : context->unlinked_jump_tables) {
for (size_t entry_index = 0; entry_index < labels.size(); entry_index++) {
sljit_label* cur_label = labels[entry_index];
jump_table[entry_index] = reinterpret_cast<void*>(sljit_get_label_addr(cur_label));
}
ret.jump_tables.emplace_back(std::move(jump_table));
}
context->unlinked_jump_tables.clear();
ret.executable_offset = sljit_get_executable_offset(compiler);
sljit_free_compiler(compiler);
compiler = nullptr;
@ -188,16 +204,26 @@ N64Recomp::LiveGeneratorOutput::~LiveGeneratorOutput() {
sljit_free_code(code, nullptr);
code = nullptr;
}
for (const char* literal : string_literals) {
delete[] literal;
}
string_literals.clear();
for (void** jump_table : jump_tables) {
delete[] jump_table;
size_t N64Recomp::LiveGeneratorOutput::num_reference_symbol_jumps() const {
return reference_symbol_jumps.size();
}
void N64Recomp::LiveGeneratorOutput::set_reference_symbol_jump(size_t jump_index, recomp_func_t* func) {
const auto& jump_entry = reference_symbol_jumps[jump_index];
sljit_set_jump_addr(reinterpret_cast<sljit_uw>(jump_entry.second), reinterpret_cast<sljit_uw>(func), executable_offset);
}
N64Recomp::ReferenceJumpDetails N64Recomp::LiveGeneratorOutput::get_reference_symbol_jump_details(size_t jump_index) {
return reference_symbol_jumps[jump_index].first;
}
void N64Recomp::LiveGeneratorOutput::populate_import_symbol_jumps(size_t import_index, recomp_func_t* func) {
auto find_range = import_jumps_by_index.equal_range(import_index);
for (auto it = find_range.first; it != find_range.second; ++it) {
sljit_set_jump_addr(reinterpret_cast<sljit_uw>(it->second), reinterpret_cast<sljit_uw>(func), executable_offset);
}
jump_tables.clear();
}
constexpr int get_gpr_context_offset(int gpr_index) {
@ -241,7 +267,6 @@ void get_gpr_values(int gpr, sljit_sw& out, sljit_sw& outw) {
bool get_operand_values(N64Recomp::Operand operand, const N64Recomp::InstructionContext& context, sljit_sw& out, sljit_sw& outw) {
using namespace N64Recomp;
bool relocation_valid = false;
switch (operand) {
case Operand::Rd:
@ -438,6 +463,8 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
}
if (op.operands.operand_operations[1] != UnaryOpType::None &&
op.operands.operand_operations[1] != UnaryOpType::ToU64 &&
op.operands.operand_operations[1] != UnaryOpType::ToS64 &&
op.operands.operand_operations[1] != UnaryOpType::Mask5 && // Only for 32-bit shifts
op.operands.operand_operations[1] != UnaryOpType::Mask6) // Only for 64-bit shifts
{
@ -455,7 +482,7 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
sljit_emit_op1(this->compiler, SLJIT_MOV_P, dst, dstw, Registers::arithmetic_temp1, 0);
};
auto do_op32 = [dst, dstw, src1, src1w, src2, src2w, this, &sign_extend_and_store](sljit_s32 op) {
auto do_op32 = [src1, src1w, src2, src2w, this, &sign_extend_and_store](sljit_s32 op) {
sljit_emit_op2(this->compiler, op, Registers::arithmetic_temp1, 0, src1, src1w, src2, src2w);
sign_extend_and_store();
};
@ -468,7 +495,7 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
sljit_emit_fop2(this->compiler, op, dst, dstw, src1, src1w, src2, src2w);
};
auto do_load_op = [dst, dstw, src1, src1w, src2, src2w, &ctx, this](sljit_s32 op, int address_xor) {
auto do_load_op = [dst, dstw, src1, src1w, src2, src2w, this](sljit_s32 op, int address_xor) {
// TODO 0 immediate optimization.
// Add the base and immediate into the arithemtic temp.
@ -486,7 +513,7 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, Registers::arithmetic_temp1, 0);
};
auto do_compare_op = [cmp_unsigned, dst, dstw, src1, src1w, src2, src2w, &ctx, this](sljit_s32 op_unsigned, sljit_s32 op_signed) {
auto do_compare_op = [cmp_unsigned, dst, dstw, src1, src1w, src2, src2w, this](sljit_s32 op_unsigned, sljit_s32 op_signed) {
// Pick the operation based on the signedness of the comparison.
sljit_s32 op = cmp_unsigned ? op_unsigned : op_signed;
@ -506,6 +533,18 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
sljit_emit_op_flags(compiler, SLJIT_MOV, dst, dstw, op);
};
auto do_float_compare_op = [dst, dstw, src1, src1w, src2, src2w, this](sljit_s32 flag_op, sljit_s32 set_op, bool double_precision) {
// Pick the operation based on the signedness of the comparison.
sljit_s32 compare_op = set_op | (double_precision ? SLJIT_CMP_F64 : SLJIT_CMP_F32);
// Perform the comparison with the determined operation.
// Float comparisons use fop1 and put the left hand side in dst.
sljit_emit_fop1(compiler, compare_op, src1, src1w, src2, src2w);
// Move the operation's flag into the destination.
sljit_emit_op_flags(compiler, SLJIT_MOV, dst, dstw, flag_op);
};
auto do_unaligned_load_op = [dst, dstw, src1, src1w, src2, src2w, this](bool left, bool doubleword) {
// TODO 0 immediate optimization.
@ -691,6 +730,24 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
case BinaryOpType::GreaterEq:
do_compare_op(SLJIT_GREATER_EQUAL, SLJIT_SIG_GREATER_EQUAL);
break;
case BinaryOpType::EqualF32:
do_float_compare_op(SLJIT_F_EQUAL, SLJIT_SET_F_EQUAL, false);
break;
case BinaryOpType::LessF32:
do_float_compare_op(SLJIT_F_LESS, SLJIT_SET_F_LESS, false);
break;
case BinaryOpType::LessEqF32:
do_float_compare_op(SLJIT_F_LESS_EQUAL, SLJIT_SET_F_LESS_EQUAL, false);
break;
case BinaryOpType::EqualF64:
do_float_compare_op(SLJIT_F_EQUAL, SLJIT_SET_F_EQUAL, true);
break;
case BinaryOpType::LessF64:
do_float_compare_op(SLJIT_F_LESS, SLJIT_SET_F_LESS, true);
break;
case BinaryOpType::LessEqF64:
do_float_compare_op(SLJIT_F_LESS_EQUAL, SLJIT_SET_F_LESS_EQUAL, true);
break;
// Loads
case BinaryOpType::LD:
@ -792,13 +849,13 @@ void N64Recomp::LiveGenerator::load_relocated_address(const InstructionContext&
// Get the pointer to the section address.
int32_t* section_addr_ptr = (ctx.reloc_tag_as_reference ? inputs.reference_section_addresses : inputs.local_section_addresses) + ctx.reloc_section_index;
// Load the section's address into R0.
sljit_emit_op1(compiler, SLJIT_MOV_S32, Registers::arithmetic_temp1, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr));
// Load the section's address into the target register.
sljit_emit_op1(compiler, SLJIT_MOV_S32, reg, 0, SLJIT_MEM0(), sljit_sw(section_addr_ptr));
// Don't emit the add if the offset is zero (small optimization).
if (ctx.reloc_target_section_offset != 0) {
// Add the reloc section offset to the section's address and put the result in R0.
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, SLJIT_IMM, ctx.reloc_target_section_offset);
sljit_emit_op2(compiler, SLJIT_ADD, reg, 0, reg, 0, SLJIT_IMM, ctx.reloc_target_section_offset);
}
}
@ -853,7 +910,6 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
sljit_s32 jit_op = SLJIT_BREAKPOINT;
bool failed = false;
bool float_op = false;
bool func_float_op = false;
@ -1200,12 +1256,52 @@ void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const Instruc
void N64Recomp::LiveGenerator::emit_function_start(const std::string& function_name, size_t func_index) const {
context->function_name = function_name;
context->func_labels[func_index] = sljit_emit_label(compiler);
// sljit_emit_op0(compiler, SLJIT_BREAKPOINT);
sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_FLOAT(1), 5 | SLJIT_ENTER_FLOAT(0), 0);
sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset);
}
void N64Recomp::LiveGenerator::emit_function_end() const {
// Nothing to do here.
// Check that all jumps have been paired to a label.
if (!context->pending_jumps.empty()) {
assert(false);
errored = true;
}
// Populate the labels for pending switches and move them into the unlinked jump tables.
bool invalid_switch = false;
for (size_t switch_index = 0; switch_index < context->switch_jump_labels.size(); switch_index++) {
const std::vector<std::string>& cur_labels = context->switch_jump_labels[switch_index];
std::vector<sljit_label*> cur_label_addrs{};
cur_label_addrs.resize(cur_labels.size());
for (size_t case_index = 0; case_index < cur_labels.size(); case_index++) {
// Find the label.
auto find_it = context->labels.find(cur_labels[case_index]);
if (find_it == context->labels.end()) {
// Label not found, invalid switch.
// Track this in a variable instead of returning immediately so that the pending labels are still cleared.
invalid_switch = true;
break;
}
cur_label_addrs[case_index] = find_it->second;
}
context->unlinked_jump_tables.emplace_back(
std::make_pair<std::vector<sljit_label*>, std::unique_ptr<void*[]>>(
std::move(cur_label_addrs),
std::move(context->pending_jump_tables[switch_index])
)
);
}
context->switch_jump_labels.clear();
context->pending_jump_tables.clear();
// Clear the labels to prevent labels from one function being jumped to by another.
context->labels.clear();
if (invalid_switch) {
assert(false);
errored = true;
}
}
void N64Recomp::LiveGenerator::emit_function_call_lookup(uint32_t addr) const {
@ -1244,13 +1340,33 @@ void N64Recomp::LiveGenerator::emit_function_call_by_register(int reg) const {
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P, P), SLJIT_R2, 0);
}
void N64Recomp::LiveGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const {
const N64Recomp::ReferenceSymbol& sym = context.get_reference_symbol(section_index, symbol_index);
assert(false);
errored = true;
void N64Recomp::LiveGenerator::emit_function_call_reference_symbol(const Context&, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const {
(void)symbol_index;
// Load rdram and ctx into R0 and R1.
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0);
// sljit_emit_op0(compiler, SLJIT_BREAKPOINT);
// Call the function and save the jump to set its label later on.
sljit_jump* call_jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP, SLJIT_ARGS2V(P, P));
// Set a dummy jump value, this will get replaced during reference/import symbol jump population.
if (section_index == N64Recomp::SectionImport) {
sljit_set_target(call_jump, sljit_uw(-1));
context->import_jumps_by_index.emplace(symbol_index, call_jump);
}
else {
sljit_set_target(call_jump, sljit_uw(-2));
context->reference_symbol_jumps.emplace_back(std::make_pair(
ReferenceJumpDetails{
.section = section_index,
.section_offset = target_section_offset
},
call_jump
));
}
}
void N64Recomp::LiveGenerator::emit_function_call(const Context& recompiler_context, size_t function_index) const {
void N64Recomp::LiveGenerator::emit_function_call(const Context&, size_t function_index) const {
// Load rdram and ctx into R0 and R1.
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, Registers::ctx, 0);
@ -1290,6 +1406,8 @@ void N64Recomp::LiveGenerator::emit_label(const std::string& label_name) const {
}
void N64Recomp::LiveGenerator::emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const {
(void)jtbl;
(void)reg;
// Nothing to do here, the live recompiler performs a subtraction to get the switch's case.
}
@ -1403,9 +1521,8 @@ void N64Recomp::LiveGenerator::emit_switch(const JumpTable& jtbl, int reg) const
}
context->switch_jump_labels.emplace_back(std::move(cur_labels));
// Allocate the jump table. Must be manually allocated to prevent the address from changing.
void** cur_jump_table = new void*[jtbl.entries.size()];
context->jump_tables.emplace_back(cur_jump_table);
// Allocate the jump table.
std::unique_ptr<void* []> cur_jump_table = std::make_unique<void* []>(jtbl.entries.size());
/// Codegen
@ -1423,18 +1540,25 @@ void N64Recomp::LiveGenerator::emit_switch(const JumpTable& jtbl, int reg) const
// Multiply the jump table addend by 2 to get the addend for the real jump table. (4 bytes per entry to 8 bytes per entry).
sljit_emit_op2(compiler, SLJIT_ADD, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0, Registers::arithmetic_temp1, 0);
// Load the real jump table address.
sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp2, 0, SLJIT_IMM, (sljit_sw)cur_jump_table);
sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp2, 0, SLJIT_IMM, (sljit_sw)cur_jump_table.get());
// Load the real jump entry.
sljit_emit_op1(compiler, SLJIT_MOV, Registers::arithmetic_temp1, 0, SLJIT_MEM2(Registers::arithmetic_temp1, Registers::arithmetic_temp2), 0);
// Jump to the loaded entry.
sljit_emit_ijump(compiler, SLJIT_JUMP, Registers::arithmetic_temp1, 0);
// Move the jump table into the pending jump tables.
context->pending_jump_tables.emplace_back(std::move(cur_jump_table));
}
void N64Recomp::LiveGenerator::emit_case(int case_index, const std::string& target_label) const {
(void)case_index;
(void)target_label;
// Nothing to do here, the jump table is built in emit_switch.
}
void N64Recomp::LiveGenerator::emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const {
(void)instr_vram;
(void)jtbl_vram;
// Nothing to do here, the jump table is built in emit_switch.
}
@ -1447,10 +1571,13 @@ void N64Recomp::LiveGenerator::emit_return() const {
}
void N64Recomp::LiveGenerator::emit_check_fr(int fpr) const {
(void)fpr;
// Nothing to do here.
}
void N64Recomp::LiveGenerator::emit_check_nan(int fpr, bool is_double) const {
(void)fpr;
(void)is_double;
// Nothing to do here.
}
@ -1704,6 +1831,7 @@ void N64Recomp::LiveGenerator::emit_trigger_event(uint32_t event_index) const {
}
void N64Recomp::LiveGenerator::emit_comment(const std::string& comment) const {
(void)comment;
// Nothing to do here.
}

View file

@ -188,6 +188,8 @@ namespace N64Recomp {
std::vector<ReferenceSymbol> reference_symbols;
// Mapping of symbol name to reference symbol index.
std::unordered_map<std::string, SymbolReference> reference_symbols_by_name;
// Whether all reference sections should be treated as relocatable (used in live recompilation).
bool all_reference_sections_relocatable = false;
public:
std::vector<Section> sections;
std::vector<Function> functions;
@ -200,6 +202,8 @@ namespace N64Recomp {
// The target ROM being recompiled, TODO move this outside of the context to avoid making a copy for mod contexts.
// Used for reading relocations and for the output binary feature.
std::vector<uint8_t> rom;
// Whether reference symbols should be validated when emitting function calls during recompilation.
bool skip_validating_reference_symbols = true;
//// Only used by the CLI, TODO move this to a struct in the internal headers.
// A mapping of function name to index in the functions vector
@ -372,6 +376,9 @@ namespace N64Recomp {
}
bool is_reference_section_relocatable(uint16_t section_index) const {
if (all_reference_sections_relocatable) {
return true;
}
if (section_index == SectionAbsolute) {
return false;
}
@ -531,6 +538,10 @@ namespace N64Recomp {
void copy_reference_sections_from(const Context& rhs) {
reference_sections = rhs.reference_sections;
}
void set_all_reference_sections_relocatable() {
all_reference_sections_relocatable = true;
}
};
class Generator;

View file

@ -34,7 +34,9 @@ namespace N64Recomp {
virtual void emit_function_end() const = 0;
virtual void emit_function_call_lookup(uint32_t addr) const = 0;
virtual void emit_function_call_by_register(int reg) const = 0;
virtual void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const = 0;
// target_section_offset can each be deduced from symbol_index if the full context is available,
// but for live recompilation the reference symbol list is unavailable so it's still provided.
virtual void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const = 0;
virtual void emit_function_call(const Context& context, size_t function_index) const = 0;
virtual void emit_goto(const std::string& target) const = 0;
virtual void emit_label(const std::string& label_name) const = 0;
@ -70,7 +72,7 @@ namespace N64Recomp {
void emit_function_end() const final;
void emit_function_call_lookup(uint32_t addr) const final;
void emit_function_call_by_register(int reg) const final;
void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const final;
void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final;
void emit_function_call(const Context& context, size_t function_index) const final;
void emit_goto(const std::string& target) const final;
void emit_label(const std::string& label_name) const final;

View file

@ -1,6 +1,7 @@
#ifndef __LIVE_RECOMPILER_H__
#define __LIVE_RECOMPILER_H__
#include <unordered_map>
#include "recompiler/generator.h"
#include "recomp.h"
@ -8,6 +9,10 @@ struct sljit_compiler;
namespace N64Recomp {
struct LiveGeneratorContext;
struct ReferenceJumpDetails {
uint16_t section;
uint32_t section_offset;
};
struct LiveGeneratorOutput {
LiveGeneratorOutput() = default;
LiveGeneratorOutput(const LiveGeneratorOutput& rhs) = delete;
@ -20,29 +25,46 @@ namespace N64Recomp {
code = rhs.code;
code_size = rhs.code_size;
functions = std::move(rhs.functions);
reference_symbol_jumps = std::move(rhs.reference_symbol_jumps);
import_jumps_by_index = std::move(rhs.import_jumps_by_index);
executable_offset = rhs.executable_offset;
rhs.good = false;
rhs.string_literals.clear();
rhs.jump_tables.clear();
rhs.code = nullptr;
rhs.code_size = 0;
rhs.reference_symbol_jumps.clear();
rhs.executable_offset = 0;
return *this;
}
~LiveGeneratorOutput();
size_t num_reference_symbol_jumps() const;
void set_reference_symbol_jump(size_t jump_index, recomp_func_t* func);
ReferenceJumpDetails get_reference_symbol_jump_details(size_t jump_index);
void populate_import_symbol_jumps(size_t import_index, recomp_func_t* func);
bool good = false;
// Storage for string literals referenced by recompiled code. These must be manually allocated to prevent
// them from moving, as the referenced address is baked into the recompiled code.
std::vector<const char*> string_literals;
// Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These must also be manually allocated
// for the same reason as strings.
std::vector<void**> jump_tables;
// Storage for string literals referenced by recompiled code. These are allocated as unique_ptr arrays
// to prevent them from moving, as the referenced address is baked into the recompiled code.
std::vector<std::unique_ptr<char[]>> string_literals;
// Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These are also
// allocated as unique_ptr arrays for the same reason as strings.
std::vector<std::unique_ptr<void*[]>> jump_tables;
// Recompiled code.
void* code;
// Size of the recompiled code.
size_t code_size;
// Pointers to each individual function within the recompiled code.
std::vector<recomp_func_t*> functions;
private:
// List of jump details and the corresponding jump instruction address. These jumps get populated after recompilation is complete
// during dependency resolution.
std::vector<std::pair<ReferenceJumpDetails, void*>> reference_symbol_jumps;
// Mapping of import symbol index to any jumps to that import symbol.
std::unordered_multimap<size_t, void*> import_jumps_by_index;
// sljit executable offset.
int64_t executable_offset;
friend class LiveGenerator;
};
struct LiveGeneratorInputs {
uint32_t base_event_index;
@ -75,7 +97,7 @@ namespace N64Recomp {
void emit_function_end() const final;
void emit_function_call_lookup(uint32_t addr) const final;
void emit_function_call_by_register(int reg) const final;
void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const final;
void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final;
void emit_function_call(const Context& context, size_t function_index) const final;
void emit_goto(const std::string& target) const final;
void emit_label(const std::string& label_name) const final;
@ -103,7 +125,7 @@ namespace N64Recomp {
void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
// Loads the relocated address specified by the instruction context into R0.
// Loads the relocated address specified by the instruction context into the target register.
void load_relocated_address(const InstructionContext& ctx, int reg) const;
sljit_compiler* compiler;
LiveGeneratorInputs inputs;

View file

@ -99,6 +99,12 @@ namespace N64Recomp {
LessEq,
Greater,
GreaterEq,
EqualF32,
LessF32,
LessEqF32,
EqualF64,
LessF64,
LessEqF64,
// Loads
LD,
LW,

View file

@ -45,9 +45,15 @@ static std::vector<BinaryOpFields> c_op_fields = []() {
setup_op(N64Recomp::BinaryOpType::Sra32, "S32", ">>"); // Arithmetic aspect will be taken care of by unary op for first operand.
setup_op(N64Recomp::BinaryOpType::Sra64, "", ">>"); // Arithmetic aspect will be taken care of by unary op for first operand.
setup_op(N64Recomp::BinaryOpType::Equal, "", "==");
setup_op(N64Recomp::BinaryOpType::EqualF32, "", "==");
setup_op(N64Recomp::BinaryOpType::EqualF64, "", "==");
setup_op(N64Recomp::BinaryOpType::NotEqual, "", "!=");
setup_op(N64Recomp::BinaryOpType::Less, "", "<");
setup_op(N64Recomp::BinaryOpType::LessF32, "", "<");
setup_op(N64Recomp::BinaryOpType::LessF64, "", "<");
setup_op(N64Recomp::BinaryOpType::LessEq, "", "<=");
setup_op(N64Recomp::BinaryOpType::LessEqF32, "", "<=");
setup_op(N64Recomp::BinaryOpType::LessEqF64, "", "<=");
setup_op(N64Recomp::BinaryOpType::Greater, "", ">");
setup_op(N64Recomp::BinaryOpType::GreaterEq, "", ">=");
setup_op(N64Recomp::BinaryOpType::LD, "LD", "");
@ -407,7 +413,8 @@ void N64Recomp::CGenerator::emit_function_call_by_register(int reg) const {
fmt::print(output_file, "LOOKUP_FUNC({})(rdram, ctx);\n", gpr_to_string(reg));
}
void N64Recomp::CGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index) const {
void N64Recomp::CGenerator::emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const {
(void)target_section_offset;
const N64Recomp::ReferenceSymbol& sym = context.get_reference_symbol(section_index, symbol_index);
fmt::print(output_file, "{}(rdram, ctx);\n", sym.name);
}

View file

@ -99,33 +99,33 @@ namespace N64Recomp {
{ InstrId::cpu_div_s, { BinaryOpType::DivFloat, Operand::Fd, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true, true } },
{ InstrId::cpu_div_d, { BinaryOpType::DivDouble, Operand::FdDouble, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true, true } },
// Float comparisons TODO remaining operations and investigate ordered/unordered and default values
{ InstrId::cpu_c_lt_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_nge_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_olt_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ult_s, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_lt_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_nge_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_olt_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ult_d, { BinaryOpType::Less, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_lt_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_nge_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_olt_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ult_s, { BinaryOpType::LessF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_lt_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_nge_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_olt_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ult_d, { BinaryOpType::LessF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_le_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngt_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ole_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ule_s, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_le_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngt_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ole_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ule_d, { BinaryOpType::LessEq, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_le_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngt_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ole_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ule_s, { BinaryOpType::LessEqF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_le_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngt_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ole_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ule_d, { BinaryOpType::LessEqF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_eq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ueq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngl_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_seq_s, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_eq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ueq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngl_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_eq_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ueq_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_ngl_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_seq_s, { BinaryOpType::EqualF32, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Fs, Operand::Ft }}, true } },
{ InstrId::cpu_c_eq_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ueq_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_ngl_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
/* TODO rename to c_seq_d when fixed in rabbitizer */
{ InstrId::cpu_c_deq_d, { BinaryOpType::Equal, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
{ InstrId::cpu_c_deq_d, { BinaryOpType::EqualF64, Operand::Cop1cs, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::FsDouble, Operand::FtDouble }}, true } },
// Loads
{ InstrId::cpu_ld, { BinaryOpType::LD, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },
{ InstrId::cpu_lw, { BinaryOpType::LW, Operand::Rt, {{ UnaryOpType::None, UnaryOpType::None }, { Operand::Base, Operand::ImmS16 }}} },

View file

@ -28,7 +28,6 @@ JalResolutionResult resolve_jal(const N64Recomp::Context& context, size_t cur_se
uint32_t section_vram_start = cur_section.ram_addr;
uint32_t section_vram_end = cur_section.ram_addr + cur_section.size;
bool in_current_section = target_func_vram >= section_vram_start && target_func_vram < section_vram_end;
bool needs_static = false;
bool exact_match_found = false;
// Use a thread local to prevent reallocation across runs and to allow multi-threading in the future.
@ -183,9 +182,9 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con
// Don't try to relocate special section symbols.
if (context.is_regular_reference_section(reloc.target_section) || reloc_section == N64Recomp::SectionAbsolute) {
bool ref_section_relocatable = context.is_reference_section_relocatable(reloc.target_section);
uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section);
// Resolve HI16 and LO16 reference symbol relocs to non-relocatable sections by patching the instruction immediate.
if (!ref_section_relocatable && (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16)) {
uint32_t ref_section_vram = context.get_reference_section_vram(reloc.target_section);
uint32_t full_immediate = reloc.target_section_offset + ref_section_vram;
if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16) {
@ -264,7 +263,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con
return true;
};
auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &section, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &output_file, &print_link_branch]
auto print_func_call_by_address = [&generator, reloc_target_section_offset, reloc_section, reloc_reference_symbol, reloc_type, &context, &func, &static_funcs_out, &needs_link_branch, &print_indent, &process_delay_slot, &print_link_branch]
(uint32_t target_func_vram, bool tail_call = false, bool indent = false)
{
bool call_by_lookup = false;
@ -286,18 +285,19 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con
std::string jal_target_name{};
size_t matched_func_index = (size_t)-1;
if (reloc_reference_symbol != (size_t)-1) {
const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol);
if (reloc_type != N64Recomp::RelocType::R_MIPS_26) {
fmt::print(stderr, "Unsupported reloc type {} on jal instruction in {}\n", (int)reloc_type, func.name);
return false;
}
if (!context.skip_validating_reference_symbols) {
const auto& ref_symbol = context.get_reference_symbol(reloc_section, reloc_reference_symbol);
if (ref_symbol.section_offset != reloc_target_section_offset) {
fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name);
return false;
}
}
}
else {
JalResolutionResult jal_result = resolve_jal(context, func.section_index, target_func_vram, matched_func_index);
@ -336,7 +336,7 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con
}
print_indent();
if (reloc_reference_symbol != (size_t)-1) {
generator.emit_function_call_reference_symbol(context, reloc_section, reloc_reference_symbol);
generator.emit_function_call_reference_symbol(context, reloc_section, reloc_reference_symbol, reloc_target_section_offset);
}
else if (call_by_lookup) {
generator.emit_function_call_lookup(target_func_vram);
@ -392,7 +392,6 @@ bool process_instruction(GeneratorType& generator, const N64Recomp::Context& con
int rd = (int)instr.GetO32_rd();
int rs = (int)instr.GetO32_rs();
int base = rs;
int rt = (int)instr.GetO32_rt();
int sa = (int)instr.Get_sa();