Merge branch 'main' into pic-jump-table

# Conflicts: # src/analysis.cpp # src/analysis.h # src/recompilation.cpp
2025-06-20 01:20:36 +00:00 · 2025-01-12 22:55:46 -05:00 · 2025-01-12 22:55:46 -05:00 · 331217ab98
commit 331217ab98
parent 1155735bf6 49bf144b0d
26 changed files with 3604 additions and 447 deletions
--- a/include/generator.h
+++ b/include/generator.h
@ -1,56 +0,0 @@
-#ifndef __GENERATOR_H__
-#define __GENERATOR_H__
-
-#include "n64recomp.h"
-#include "operations.h"
-
-namespace N64Recomp {
-    struct InstructionContext {
-        int rd;
-        int rs;
-        int rt;
-        int sa;
-
-        int fd;
-        int fs;
-        int ft;
-
-        int cop1_cs;
-
-        uint16_t imm16;
-
-        bool reloc_tag_as_reference;
-        RelocType reloc_type;
-        uint32_t reloc_section_index;
-        uint32_t reloc_target_section_offset;
-    };
-
-    class Generator {
-    public:
-        virtual void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const = 0;
-        virtual void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const = 0;
-        virtual void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const = 0;
-        virtual void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0;
-        virtual void emit_branch_close(std::ostream& output_file) const = 0;
-        virtual void emit_check_fr(std::ostream& output_file, int fpr) const = 0;
-        virtual void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const = 0;
-    };
-
-    class CGenerator final : Generator {
-    public:
-        CGenerator() = default;
-        void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const final;
-        void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const final;
-        void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const final;
-        void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
-        void emit_branch_close(std::ostream& output_file) const final;
-        void emit_check_fr(std::ostream& output_file, int fpr) const final;
-        void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const final;
-    private:
-        void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
-        void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
-        void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
-    };
-}
-
-#endif
--- a/include/recomp.h
+++ b/include/recomp.h
@ -0,0 +1,397 @@
+#ifndef __RECOMP_H__
+#define __RECOMP_H__
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+#include <assert.h>
+
+// Compiler definition to disable inter-procedural optimization, allowing multiple functions to be in a single file without breaking interposition.
+#if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+    // MSVC's __declspec(noinline) seems to disable inter-procedural optimization entirely, so it's all that's needed.
+    #define RECOMP_FUNC __declspec(noinline)
+    
+    // Use MSVC's fenv_access pragma.
+    #define SET_FENV_ACCESS() _Pragma("fenv_access(on)")
+#elif defined(__clang__)
+    // Clang has no dedicated IPO attribute, so we use a combination of other attributes to give the desired behavior.
+    // The inline keyword allows multiple definitions during linking, and extern forces clang to emit an externally visible definition.
+    // Weak forces Clang to not perform any IPO as the symbol can be interposed, which prevents actual inlining due to the inline keyword.
+    // Add noinline on for good measure, which doesn't conflict with the inline keyword as they have different meanings.
+    #define RECOMP_FUNC extern inline __attribute__((weak,noinline))
+
+    // Use the standard STDC FENV_ACCESS pragma.
+    #define SET_FENV_ACCESS() _Pragma("STDC FENV_ACCESS ON")
+#elif defined(__GNUC__) && !defined(__INTEL_COMPILER)
+    // Use GCC's attribute for disabling inter-procedural optimizations. Also enable the rounding-math compiler flag to disable
+    // constant folding so that arithmetic respects the floating point environment. This is needed because gcc doesn't implement
+    // any FENV_ACCESS pragma.
+    #define RECOMP_FUNC __attribute__((noipa, optimize("rounding-math")))
+
+    // There's no FENV_ACCESS pragma in gcc, so this can be empty.
+    #define SET_FENV_ACCESS()
+#else
+    #error "No RECOMP_FUNC definition for this compiler"
+#endif
+
+// Implementation of 64-bit multiply and divide instructions
+#if defined(__SIZEOF_INT128__)
+
+static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) {
+    __int128 full128 = ((__int128)a) * ((__int128)b);
+
+    *hi64 = (int64_t)(full128 >> 64);
+    *lo64 = (int64_t)(full128 >> 0);
+}
+
+static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) {
+    unsigned __int128 full128 = ((unsigned __int128)a) * ((unsigned __int128)b);
+
+    *hi64 = (uint64_t)(full128 >> 64);
+    *lo64 = (uint64_t)(full128 >> 0);
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+#pragma intrinsic(_mul128)
+#pragma intrinsic(_umul128)
+
+static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) {
+    *lo64 = _mul128(a, b, hi64);
+}
+
+static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) {
+    *lo64 = _umul128(a, b, hi64);
+}
+
+#else
+#error "128-bit integer type not found"
+#endif
+
+static inline void DDIV(int64_t a, int64_t b, int64_t* quot, int64_t* rem) {
+    int overflow = ((uint64_t)a == 0x8000000000000000ull) && (b == -1ll);
+    *quot = overflow ? a : (a / b);
+    *rem = overflow ? 0 : (a % b);
+}
+
+static inline void DDIVU(uint64_t a, uint64_t b, uint64_t* quot, uint64_t* rem) {
+    *quot = a / b;
+    *rem = a % b;
+}
+
+typedef uint64_t gpr;
+
+#define SIGNED(val) \
+    ((int64_t)(val))
+
+#define ADD32(a, b) \
+    ((gpr)(int32_t)((a) + (b)))
+
+#define SUB32(a, b) \
+    ((gpr)(int32_t)((a) - (b)))
+
+#define MEM_W(offset, reg) \
+    (*(int32_t*)(rdram + ((((reg) + (offset))) - 0xFFFFFFFF80000000)))
+
+#define MEM_H(offset, reg) \
+    (*(int16_t*)(rdram + ((((reg) + (offset)) ^ 2) - 0xFFFFFFFF80000000)))
+
+#define MEM_B(offset, reg) \
+    (*(int8_t*)(rdram + ((((reg) + (offset)) ^ 3) - 0xFFFFFFFF80000000)))
+
+#define MEM_HU(offset, reg) \
+    (*(uint16_t*)(rdram + ((((reg) + (offset)) ^ 2) - 0xFFFFFFFF80000000)))
+
+#define MEM_BU(offset, reg) \
+    (*(uint8_t*)(rdram + ((((reg) + (offset)) ^ 3) - 0xFFFFFFFF80000000)))
+
+#define SD(val, offset, reg) { \
+    *(uint32_t*)(rdram + ((((reg) + (offset) + 4)) - 0xFFFFFFFF80000000)) = (uint32_t)((gpr)(val) >> 0); \
+    *(uint32_t*)(rdram + ((((reg) + (offset) + 0)) - 0xFFFFFFFF80000000)) = (uint32_t)((gpr)(val) >> 32); \
+}
+
+static inline uint64_t load_doubleword(uint8_t* rdram, gpr reg, gpr offset) {
+    uint64_t ret = 0;
+    uint64_t lo = (uint64_t)(uint32_t)MEM_W(reg, offset + 4);
+    uint64_t hi = (uint64_t)(uint32_t)MEM_W(reg, offset + 0);
+    ret = (lo << 0) | (hi << 32);
+    return ret;
+}
+
+#define LD(offset, reg) \
+    load_doubleword(rdram, offset, reg)
+
+static inline gpr do_lwl(uint8_t* rdram, gpr initial_value, gpr offset, gpr reg) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+
+    // Load the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t loaded_value = MEM_W(0, word_address);
+
+    // Mask the existing value and shift the loaded value appropriately
+    gpr misalignment = address & 0x3;
+    gpr masked_value = initial_value & (gpr)(uint32_t)~(0xFFFFFFFFu << (misalignment * 8));
+    loaded_value <<= (misalignment * 8);
+
+    // Cast to int32_t to sign extend first
+    return (gpr)(int32_t)(masked_value | loaded_value);
+}
+
+static inline gpr do_lwr(uint8_t* rdram, gpr initial_value, gpr offset, gpr reg) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+    
+    // Load the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t loaded_value = MEM_W(0, word_address);
+
+    // Mask the existing value and shift the loaded value appropriately
+    gpr misalignment = address & 0x3;
+    gpr masked_value = initial_value & (gpr)(uint32_t)~(0xFFFFFFFFu >> (24 - misalignment * 8));
+    loaded_value >>= (24 - misalignment * 8);
+
+    // Cast to int32_t to sign extend first
+    return (gpr)(int32_t)(masked_value | loaded_value);
+}
+
+static inline void do_swl(uint8_t* rdram, gpr offset, gpr reg, gpr val) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+
+    // Get the initial value of the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t initial_value = MEM_W(0, word_address);
+
+    // Mask the initial value and shift the input value appropriately
+    gpr misalignment = address & 0x3;
+    uint32_t masked_initial_value = initial_value & ~(0xFFFFFFFFu >> (misalignment * 8));
+    uint32_t shifted_input_value = ((uint32_t)val) >> (misalignment * 8);
+    MEM_W(0, word_address) = masked_initial_value | shifted_input_value;
+}
+
+static inline void do_swr(uint8_t* rdram, gpr offset, gpr reg, gpr val) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+
+    // Get the initial value of the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t initial_value = MEM_W(0, word_address);
+
+    // Mask the initial value and shift the input value appropriately
+    gpr misalignment = address & 0x3;
+    uint32_t masked_initial_value = initial_value & ~(0xFFFFFFFFu << (24 - misalignment * 8));
+    uint32_t shifted_input_value = ((uint32_t)val) << (24 - misalignment * 8);
+    MEM_W(0, word_address) = masked_initial_value | shifted_input_value;
+}
+
+static inline uint32_t get_cop1_cs() {
+    uint32_t rounding_mode = 0;
+    switch (fegetround()) {
+        // round to nearest value
+        case FE_TONEAREST:
+        default:
+            rounding_mode = 0;
+            break;
+        // round to zero (truncate)
+        case FE_TOWARDZERO:
+            rounding_mode = 1;
+            break;
+        // round to positive infinity (ceil)
+        case FE_UPWARD:
+            rounding_mode = 2;
+            break;
+        // round to negative infinity (floor)
+        case FE_DOWNWARD:
+            rounding_mode = 3;
+            break;
+    }
+    return rounding_mode;
+}
+
+static inline void set_cop1_cs(uint32_t val) {
+    uint32_t rounding_mode = val & 0x3;
+    int round = FE_TONEAREST;
+    switch (rounding_mode) {
+        case 0: // round to nearest value
+            round = FE_TONEAREST;
+            break;
+        case 1: // round to zero (truncate)
+            round = FE_TOWARDZERO;
+            break;
+        case 2: // round to positive infinity (ceil)
+            round = FE_UPWARD;
+            break;
+        case 3: // round to negative infinity (floor)
+            round = FE_DOWNWARD;
+            break;
+    }
+    fesetround(round);
+}
+
+#define S32(val) \
+    ((int32_t)(val))
+    
+#define U32(val) \
+    ((uint32_t)(val))
+
+#define S64(val) \
+    ((int64_t)(val))
+
+#define U64(val) \
+    ((uint64_t)(val))
+
+#define MUL_S(val1, val2) \
+    ((val1) * (val2))
+
+#define MUL_D(val1, val2) \
+    ((val1) * (val2))
+
+#define DIV_S(val1, val2) \
+    ((val1) / (val2))
+
+#define DIV_D(val1, val2) \
+    ((val1) / (val2))
+
+#define CVT_S_W(val) \
+    ((float)((int32_t)(val)))
+
+#define CVT_D_W(val) \
+    ((double)((int32_t)(val)))
+
+#define CVT_D_L(val) \
+    ((double)((int64_t)(val)))
+
+#define CVT_S_L(val) \
+    ((float)((int64_t)(val)))
+
+#define CVT_D_S(val) \
+    ((double)(val))
+
+#define CVT_S_D(val) \
+    ((float)(val))
+
+#define TRUNC_W_S(val) \
+    ((int32_t)(val))
+
+#define TRUNC_W_D(val) \
+    ((int32_t)(val))
+
+#define TRUNC_L_S(val) \
+    ((int64_t)(val))
+
+#define TRUNC_L_D(val) \
+    ((int64_t)(val))
+
+#define DEFAULT_ROUNDING_MODE 0
+
+static inline int32_t do_cvt_w_s(float val) {
+    // Rounding mode aware float to 32-bit int conversion.
+    return (int32_t)lrintf(val);
+}
+
+#define CVT_W_S(val) \
+    do_cvt_w_s(val)
+
+static inline int64_t do_cvt_l_s(float val) {
+    // Rounding mode aware float to 64-bit int conversion.
+    return (int64_t)llrintf(val);
+}
+
+#define CVT_L_S(val) \
+    do_cvt_l_s(val);
+
+static inline int32_t do_cvt_w_d(double val) {
+    // Rounding mode aware double to 32-bit int conversion.
+    return (int32_t)lrint(val);
+}
+
+#define CVT_W_D(val) \
+    do_cvt_w_d(val)
+
+static inline int64_t do_cvt_l_d(double val) {
+    // Rounding mode aware double to 64-bit int conversion.
+    return (int64_t)llrint(val);
+}
+
+#define CVT_L_D(val) \
+    do_cvt_l_d(val)
+
+#define NAN_CHECK(val) \
+    assert(val == val)
+
+//#define NAN_CHECK(val)
+
+typedef union {
+    double d;
+    struct {
+        float fl;
+        float fh;
+    };
+    struct {
+        uint32_t u32l;
+        uint32_t u32h;
+    };
+    uint64_t u64;
+} fpr;
+
+typedef struct {
+    gpr r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
+        r8,  r9,  r10, r11, r12, r13, r14, r15,
+        r16, r17, r18, r19, r20, r21, r22, r23,
+        r24, r25, r26, r27, r28, r29, r30, r31;
+    fpr f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
+        f8,  f9,  f10, f11, f12, f13, f14, f15,
+        f16, f17, f18, f19, f20, f21, f22, f23,
+        f24, f25, f26, f27, f28, f29, f30, f31;
+    uint64_t hi, lo;
+    uint32_t* f_odd;
+    uint32_t status_reg;
+    uint8_t mips3_float_mode;
+} recomp_context;
+
+// Checks if the target is an even float register or that mips3 float mode is enabled
+#define CHECK_FR(ctx, idx) \
+    assert(((idx) & 1) == 0 || (ctx)->mips3_float_mode)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void cop0_status_write(recomp_context* ctx, gpr value);
+gpr cop0_status_read(recomp_context* ctx);
+void switch_error(const char* func, uint32_t vram, uint32_t jtbl);
+void do_break(uint32_t vram);
+
+typedef void (recomp_func_t)(uint8_t* rdram, recomp_context* ctx);
+
+recomp_func_t* get_function(int32_t vram);
+
+#define LOOKUP_FUNC(val) \
+    get_function((int32_t)(val))
+
+extern int32_t* section_addresses;
+
+#define LO16(x) \
+    ((x) & 0xFFFF)
+
+#define HI16(x) \
+    (((x) >> 16) + (((x) >> 15) & 1))
+
+#define RELOC_HI16(section_index, offset) \
+    HI16(section_addresses[section_index] + (offset))
+
+#define RELOC_LO16(section_index, offset) \
+    LO16(section_addresses[section_index] + (offset))
+
+void recomp_syscall_handler(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram);
+
+void pause_self(uint8_t *rdram);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/include/recompiler/context.h
+++ b/include/recompiler/context.h
@ -37,6 +37,21 @@ namespace N64Recomp {
                : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {}
        Function() = default;
    };
+    
+    struct JumpTable {
+        uint32_t vram;
+        uint32_t addend_reg;
+        uint32_t rom;
+        uint32_t lw_vram;
+        uint32_t addu_vram;
+        uint32_t jr_vram;
+        uint16_t section_index;
+        std::optional<uint32_t> got_offset;
+        std::vector<uint32_t> entries;
+
+        JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, uint16_t section_index, std::optional<uint32_t> got_offset, std::vector<uint32_t>&& entries)
+                : vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), section_index(section_index), got_offset(got_offset), entries(std::move(entries)) {}
+    };

    enum class RelocType : uint8_t {
        R_MIPS_NONE = 0,
@ -177,6 +192,8 @@ namespace N64Recomp {
        std::vector<ReferenceSymbol> reference_symbols;
        // Mapping of symbol name to reference symbol index.
        std::unordered_map<std::string, SymbolReference> reference_symbols_by_name;
+        // Whether all reference sections should be treated as relocatable (used in live recompilation).
+        bool all_reference_sections_relocatable = false;
    public:
        std::vector<Section> sections;
        std::vector<Function> functions;
@ -189,6 +206,8 @@ namespace N64Recomp {
        // The target ROM being recompiled, TODO move this outside of the context to avoid making a copy for mod contexts.
        // Used for reading relocations and for the output binary feature.
        std::vector<uint8_t> rom;
+        // Whether reference symbols should be validated when emitting function calls during recompilation.
+        bool skip_validating_reference_symbols = true;

        //// Only used by the CLI, TODO move this to a struct in the internal headers.
        // A mapping of function name to index in the functions vector
@ -218,6 +237,9 @@ namespace N64Recomp {
        // List of symbols from events, which contains the names of events that this context provides.
        std::vector<EventSymbol> event_symbols;

+        // Causes functions to print their name to the console the first time they're called.
+        bool trace_mode;
+
        // Imports sections and function symbols from a provided context into this context's reference sections and reference functions.
        bool import_reference_context(const Context& reference_context);
        // Reads a data symbol file and adds its contents into this context's reference data symbols.
@ -358,6 +380,9 @@ namespace N64Recomp {
        }

        bool is_reference_section_relocatable(uint16_t section_index) const {
+            if (all_reference_sections_relocatable) {
+                return true;
+            }
            if (section_index == SectionAbsolute) {
                return false;
            }
@ -517,9 +542,15 @@ namespace N64Recomp {
        void copy_reference_sections_from(const Context& rhs) {
            reference_sections = rhs.reference_sections;
        }
+
+        void set_all_reference_sections_relocatable() {
+            all_reference_sections_relocatable = true;
+        }
    };

-    bool recompile_function(const Context& context, const Function& func, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs, bool tag_reference_relocs);
+    class Generator;
+    bool recompile_function(const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs, bool tag_reference_relocs);
+    bool recompile_function_custom(Generator& generator, const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs);

    enum class ModSymbolsError {
        Good,
--- a/include/recompiler/generator.h
+++ b/include/recompiler/generator.h
@ -0,0 +1,109 @@
+#ifndef __GENERATOR_H__
+#define __GENERATOR_H__
+
+#include "recompiler/context.h"
+#include "operations.h"
+
+namespace N64Recomp {
+    struct InstructionContext {
+        int rd;
+        int rs;
+        int rt;
+        int sa;
+
+        int fd;
+        int fs;
+        int ft;
+
+        int cop1_cs;
+
+        uint16_t imm16;
+
+        bool reloc_tag_as_reference;
+        RelocType reloc_type;
+        uint32_t reloc_section_index;
+        uint32_t reloc_target_section_offset;
+    };
+
+    class Generator {
+    public:
+        virtual void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const = 0;
+        virtual void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const = 0;
+        virtual void process_store_op(const StoreOp& op, const InstructionContext& ctx) const = 0;
+        virtual void emit_function_start(const std::string& function_name, size_t func_index) const = 0;
+        virtual void emit_function_end() const = 0;
+        virtual void emit_function_call_lookup(uint32_t addr) const = 0;
+        virtual void emit_function_call_by_register(int reg) const = 0;
+        // target_section_offset can each be deduced from symbol_index if the full context is available,
+        // but for live recompilation the reference symbol list is unavailable so it's still provided.
+        virtual void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const = 0;
+        virtual void emit_function_call(const Context& context, size_t function_index) const = 0;
+        virtual void emit_named_function_call(const std::string& function_name) const = 0;
+        virtual void emit_goto(const std::string& target) const = 0;
+        virtual void emit_label(const std::string& label_name) const = 0;
+        virtual void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const = 0;
+        virtual void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0;
+        virtual void emit_branch_close() const = 0;
+        virtual void emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const = 0;
+        virtual void emit_case(int case_index, const std::string& target_label) const = 0;
+        virtual void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const = 0;
+        virtual void emit_switch_close() const = 0;
+        virtual void emit_return(const Context& context) const = 0;
+        virtual void emit_check_fr(int fpr) const = 0;
+        virtual void emit_check_nan(int fpr, bool is_double) const = 0;
+        virtual void emit_cop0_status_read(int reg) const = 0;
+        virtual void emit_cop0_status_write(int reg) const = 0;
+        virtual void emit_cop1_cs_read(int reg) const = 0;
+        virtual void emit_cop1_cs_write(int reg) const = 0;
+        virtual void emit_muldiv(InstrId instr_id, int reg1, int reg2) const = 0;
+        virtual void emit_syscall(uint32_t instr_vram) const = 0;
+        virtual void emit_do_break(uint32_t instr_vram) const = 0;
+        virtual void emit_pause_self() const = 0;
+        virtual void emit_trigger_event(uint32_t event_index) const = 0;
+        virtual void emit_comment(const std::string& comment) const = 0;
+    };
+
+    class CGenerator final : Generator {
+    public:
+        CGenerator(std::ostream& output_file) : output_file(output_file) {};
+        void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const final;
+        void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const final;
+        void process_store_op(const StoreOp& op, const InstructionContext& ctx) const final;
+        void emit_function_start(const std::string& function_name, size_t func_index) const final;
+        void emit_function_end() const final;
+        void emit_function_call_lookup(uint32_t addr) const final;
+        void emit_function_call_by_register(int reg) const final;
+        void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final;
+        void emit_function_call(const Context& context, size_t function_index) const final;
+        void emit_named_function_call(const std::string& function_name) const final;
+        void emit_goto(const std::string& target) const final;
+        void emit_label(const std::string& label_name) const final;
+        void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const final;
+        void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
+        void emit_branch_close() const final;
+        void emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const final;
+        void emit_case(int case_index, const std::string& target_label) const final;
+        void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const final;
+        void emit_switch_close() const final;
+        void emit_return(const Context& context) const final;
+        void emit_check_fr(int fpr) const final;
+        void emit_check_nan(int fpr, bool is_double) const final;
+        void emit_cop0_status_read(int reg) const final;
+        void emit_cop0_status_write(int reg) const final;
+        void emit_cop1_cs_read(int reg) const final;
+        void emit_cop1_cs_write(int reg) const final;
+        void emit_muldiv(InstrId instr_id, int reg1, int reg2) const final;
+        void emit_syscall(uint32_t instr_vram) const final;
+        void emit_do_break(uint32_t instr_vram) const final;
+        void emit_pause_self() const final;
+        void emit_trigger_event(uint32_t event_index) const final;
+        void emit_comment(const std::string& comment) const final;
+    private:
+        void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
+        void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
+        void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
+        std::ostream& output_file;
+    };
+}
+
+#endif
--- a/include/recompiler/live_recompiler.h
+++ b/include/recompiler/live_recompiler.h
@ -0,0 +1,141 @@
+#ifndef __LIVE_RECOMPILER_H__
+#define __LIVE_RECOMPILER_H__
+
+#include <unordered_map>
+#include "recompiler/generator.h"
+#include "recomp.h"
+
+struct sljit_compiler;
+
+namespace N64Recomp {
+    struct LiveGeneratorContext;
+    struct ReferenceJumpDetails {
+        uint16_t section;
+        uint32_t section_offset;
+    };
+    struct LiveGeneratorOutput {
+        LiveGeneratorOutput() = default;
+        LiveGeneratorOutput(const LiveGeneratorOutput& rhs) = delete;
+        LiveGeneratorOutput(LiveGeneratorOutput&& rhs) { *this = std::move(rhs); }
+        LiveGeneratorOutput& operator=(const LiveGeneratorOutput& rhs) = delete;
+        LiveGeneratorOutput& operator=(LiveGeneratorOutput&& rhs) {
+            good = rhs.good;
+            string_literals = std::move(rhs.string_literals);
+            jump_tables = std::move(rhs.jump_tables);
+            code = rhs.code;
+            code_size = rhs.code_size;
+            functions = std::move(rhs.functions);
+            reference_symbol_jumps = std::move(rhs.reference_symbol_jumps);
+            import_jumps_by_index = std::move(rhs.import_jumps_by_index);
+            executable_offset = rhs.executable_offset;
+
+            rhs.good = false;
+            rhs.code = nullptr;
+            rhs.code_size = 0;
+            rhs.reference_symbol_jumps.clear();
+            rhs.executable_offset = 0;
+
+            return *this;
+        }
+        ~LiveGeneratorOutput();
+        size_t num_reference_symbol_jumps() const;
+        void set_reference_symbol_jump(size_t jump_index, recomp_func_t* func);
+        ReferenceJumpDetails get_reference_symbol_jump_details(size_t jump_index);
+        void populate_import_symbol_jumps(size_t import_index, recomp_func_t* func);
+        bool good = false;
+        // Storage for string literals referenced by recompiled code. These are allocated as unique_ptr arrays
+        // to prevent them from moving, as the referenced address is baked into the recompiled code.
+        std::vector<std::unique_ptr<char[]>> string_literals;
+        // Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These are also
+        // allocated as unique_ptr arrays for the same reason as strings.
+        std::vector<std::unique_ptr<void*[]>> jump_tables;
+        // Recompiled code.
+        void* code;
+        // Size of the recompiled code.
+        size_t code_size;
+        // Pointers to each individual function within the recompiled code.
+        std::vector<recomp_func_t*> functions;
+    private:
+        // List of jump details and the corresponding jump instruction address. These jumps get populated after recompilation is complete
+        // during dependency resolution.
+        std::vector<std::pair<ReferenceJumpDetails, void*>> reference_symbol_jumps;
+        // Mapping of import symbol index to any jumps to that import symbol.
+        std::unordered_multimap<size_t, void*> import_jumps_by_index;
+        // sljit executable offset.
+        int64_t executable_offset;
+
+        friend class LiveGenerator;
+    };
+    struct LiveGeneratorInputs {
+        uint32_t base_event_index;
+        void (*cop0_status_write)(recomp_context* ctx, gpr value);
+        gpr (*cop0_status_read)(recomp_context* ctx);
+        void (*switch_error)(const char* func, uint32_t vram, uint32_t jtbl);
+        void (*do_break)(uint32_t vram);
+        recomp_func_t* (*get_function)(int32_t vram);
+        void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram);
+        void (*pause_self)(uint8_t* rdram);
+        void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index);
+        int32_t *reference_section_addresses;
+        int32_t *local_section_addresses;
+    };
+    class LiveGenerator final : public Generator {
+    public:
+        LiveGenerator(size_t num_funcs, const LiveGeneratorInputs& inputs);
+        ~LiveGenerator();
+        // Prevent moving or copying.
+        LiveGenerator(const LiveGenerator& rhs) = delete;
+        LiveGenerator(LiveGenerator&& rhs) = delete;
+        LiveGenerator& operator=(const LiveGenerator& rhs) = delete;
+        LiveGenerator& operator=(LiveGenerator&& rhs) = delete;
+
+        LiveGeneratorOutput finish();
+        void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const final;
+        void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const final;
+        void process_store_op(const StoreOp& op, const InstructionContext& ctx) const final;
+        void emit_function_start(const std::string& function_name, size_t func_index) const final;
+        void emit_function_end() const final;
+        void emit_function_call_lookup(uint32_t addr) const final;
+        void emit_function_call_by_register(int reg) const final;
+        void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final;
+        void emit_function_call(const Context& context, size_t function_index) const final;
+        void emit_named_function_call(const std::string& function_name) const final;
+        void emit_goto(const std::string& target) const final;
+        void emit_label(const std::string& label_name) const final;
+        void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const final;
+        void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
+        void emit_branch_close() const final;
+        void emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const final;
+        void emit_case(int case_index, const std::string& target_label) const final;
+        void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const final;
+        void emit_switch_close() const final;
+        void emit_return(const Context& context) const final;
+        void emit_check_fr(int fpr) const final;
+        void emit_check_nan(int fpr, bool is_double) const final;
+        void emit_cop0_status_read(int reg) const final;
+        void emit_cop0_status_write(int reg) const final;
+        void emit_cop1_cs_read(int reg) const final;
+        void emit_cop1_cs_write(int reg) const final;
+        void emit_muldiv(InstrId instr_id, int reg1, int reg2) const final;
+        void emit_syscall(uint32_t instr_vram) const final;
+        void emit_do_break(uint32_t instr_vram) const final;
+        void emit_pause_self() const final;
+        void emit_trigger_event(uint32_t event_index) const final;
+        void emit_comment(const std::string& comment) const final;
+    private:
+        void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
+        void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
+        void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
+        // Loads the relocated address specified by the instruction context into the target register.
+        void load_relocated_address(const InstructionContext& ctx, int reg) const;
+        sljit_compiler* compiler;
+        LiveGeneratorInputs inputs;
+        mutable std::unique_ptr<LiveGeneratorContext> context;
+        mutable bool errored;
+    };
+
+    void live_recompiler_init();
+    bool recompile_function_live(LiveGenerator& generator, const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs);
+}
+
+#endif
--- a/include/recompiler/operations.h
+++ b/include/recompiler/operations.h
@ -28,13 +28,12 @@ namespace N64Recomp {
        ToU32,
        ToS64,
        ToU64,
-        NegateS32,
-        NegateS64,
        Lui,
        Mask5, // Mask to 5 bits
        Mask6, // Mask to 5 bits
        ToInt32, // Functionally equivalent to ToS32, only exists for parity with old codegen
-        Negate,
+        NegateFloat,
+        NegateDouble,
        AbsFloat,
        AbsDouble,
        SqrtFloat,
@ -51,12 +50,20 @@ namespace N64Recomp {
        ConvertLFromS,
        TruncateWFromS,
        TruncateWFromD,
+        TruncateLFromS,
+        TruncateLFromD,
        RoundWFromS,
        RoundWFromD,
+        RoundLFromS,
+        RoundLFromD,
        CeilWFromS,
        CeilWFromD,
+        CeilLFromS,
+        CeilLFromD,
        FloorWFromS,
-        FloorWFromD
+        FloorWFromD,
+        FloorLFromS,
+        FloorLFromD
    };

    enum class BinaryOpType {
@ -92,6 +99,12 @@ namespace N64Recomp {
        LessEq,
        Greater,
        GreaterEq,
+        EqualFloat,
+        LessFloat,
+        LessEqFloat,
+        EqualDouble,
+        LessDouble,
+        LessEqDouble,
        // Loads
        LD,
        LW,