Implement live recompiler (#114)

This commit implements the "live recompiler", which is another backend for the recompiler that generates platform-specific assembly at runtime. This is still static recompilation as opposed to dynamic recompilation, as it still requires information about the binary to recompile and leverages the same static analysis that the C recompiler uses. However, similarly to dynamic recompilation it's aimed at recompiling binaries at runtime, mainly for modding purposes. The live recompiler leverages a library called sljit to generate platform-specific code. This library provides an API that's implemented on several platforms, including the main targets of this component: x86_64 and ARM64. Performance is expected to be slower than the C recompiler, but should still be plenty fast enough for running large amounts of recompiled code without an issue. Considering these ROMs can often be run through an interpreter and still hit their full speed, performance should not be a concern for running native code even if it's less optimal than the C recompiler's codegen. As mentioned earlier, the main use of the live recompiler will be for loading mods in the N64Recomp runtime. This makes it so that modders don't need to ship platform-specific binaries for their mods, and allows fixing bugs with recompilation down the line without requiring modders to update their binaries. This PR also includes a utility for testing the live recompiler. It accepts binaries in a custom format which contain the instructions, input data, and target data. Documentation for the test format as well as most of the tests that were used to validate the live recompiler can be found here. The few remaining tests were hacked together binaries that I put together very hastily, so they need to be cleaned up and will probably be uploaded at a later date. The only test in that suite that doesn't currently succeed is the div test, due to unknown behavior when the two operands aren't properly sign extended to 64 bits. This has no bearing on practical usage, since the inputs will always be sign extended as expected.
2025-07-04 07:36:21 +00:00 · 2024-12-31 16:11:40 -05:00 · 2024-12-31 16:11:40 -05:00 · 66062a06e9
commit 66062a06e9
parent 0d0e93e979
24 changed files with 3452 additions and 385 deletions
--- a/include/generator.h
+++ b/include/generator.h
@ -1,56 +0,0 @@
-#ifndef __GENERATOR_H__
-#define __GENERATOR_H__
-
-#include "n64recomp.h"
-#include "operations.h"
-
-namespace N64Recomp {
-    struct InstructionContext {
-        int rd;
-        int rs;
-        int rt;
-        int sa;
-
-        int fd;
-        int fs;
-        int ft;
-
-        int cop1_cs;
-
-        uint16_t imm16;
-
-        bool reloc_tag_as_reference;
-        RelocType reloc_type;
-        uint32_t reloc_section_index;
-        uint32_t reloc_target_section_offset;
-    };
-
-    class Generator {
-    public:
-        virtual void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const = 0;
-        virtual void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const = 0;
-        virtual void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const = 0;
-        virtual void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0;
-        virtual void emit_branch_close(std::ostream& output_file) const = 0;
-        virtual void emit_check_fr(std::ostream& output_file, int fpr) const = 0;
-        virtual void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const = 0;
-    };
-
-    class CGenerator final : Generator {
-    public:
-        CGenerator() = default;
-        void process_binary_op(std::ostream& output_file, const BinaryOp& op, const InstructionContext& ctx) const final;
-        void process_unary_op(std::ostream& output_file, const UnaryOp& op, const InstructionContext& ctx) const final;
-        void process_store_op(std::ostream& output_file, const StoreOp& op, const InstructionContext& ctx) const final;
-        void emit_branch_condition(std::ostream& output_file, const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
-        void emit_branch_close(std::ostream& output_file) const final;
-        void emit_check_fr(std::ostream& output_file, int fpr) const final;
-        void emit_check_nan(std::ostream& output_file, int fpr, bool is_double) const final;
-    private:
-        void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
-        void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
-        void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
-    };
-}
-
-#endif
--- a/include/recomp.h
+++ b/include/recomp.h
@ -0,0 +1,397 @@
+#ifndef __RECOMP_H__
+#define __RECOMP_H__
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+#include <assert.h>
+
+// Compiler definition to disable inter-procedural optimization, allowing multiple functions to be in a single file without breaking interposition.
+#if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+    // MSVC's __declspec(noinline) seems to disable inter-procedural optimization entirely, so it's all that's needed.
+    #define RECOMP_FUNC __declspec(noinline)
+    
+    // Use MSVC's fenv_access pragma.
+    #define SET_FENV_ACCESS() _Pragma("fenv_access(on)")
+#elif defined(__clang__)
+    // Clang has no dedicated IPO attribute, so we use a combination of other attributes to give the desired behavior.
+    // The inline keyword allows multiple definitions during linking, and extern forces clang to emit an externally visible definition.
+    // Weak forces Clang to not perform any IPO as the symbol can be interposed, which prevents actual inlining due to the inline keyword.
+    // Add noinline on for good measure, which doesn't conflict with the inline keyword as they have different meanings.
+    #define RECOMP_FUNC extern inline __attribute__((weak,noinline))
+
+    // Use the standard STDC FENV_ACCESS pragma.
+    #define SET_FENV_ACCESS() _Pragma("STDC FENV_ACCESS ON")
+#elif defined(__GNUC__) && !defined(__INTEL_COMPILER)
+    // Use GCC's attribute for disabling inter-procedural optimizations. Also enable the rounding-math compiler flag to disable
+    // constant folding so that arithmetic respects the floating point environment. This is needed because gcc doesn't implement
+    // any FENV_ACCESS pragma.
+    #define RECOMP_FUNC __attribute__((noipa, optimize("rounding-math")))
+
+    // There's no FENV_ACCESS pragma in gcc, so this can be empty.
+    #define SET_FENV_ACCESS()
+#else
+    #error "No RECOMP_FUNC definition for this compiler"
+#endif
+
+// Implementation of 64-bit multiply and divide instructions
+#if defined(__SIZEOF_INT128__)
+
+static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) {
+    __int128 full128 = ((__int128)a) * ((__int128)b);
+
+    *hi64 = (int64_t)(full128 >> 64);
+    *lo64 = (int64_t)(full128 >> 0);
+}
+
+static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) {
+    unsigned __int128 full128 = ((unsigned __int128)a) * ((unsigned __int128)b);
+
+    *hi64 = (uint64_t)(full128 >> 64);
+    *lo64 = (uint64_t)(full128 >> 0);
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+#pragma intrinsic(_mul128)
+#pragma intrinsic(_umul128)
+
+static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) {
+    *lo64 = _mul128(a, b, hi64);
+}
+
+static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) {
+    *lo64 = _umul128(a, b, hi64);
+}
+
+#else
+#error "128-bit integer type not found"
+#endif
+
+static inline void DDIV(int64_t a, int64_t b, int64_t* quot, int64_t* rem) {
+    int overflow = ((uint64_t)a == 0x8000000000000000ull) && (b == -1ll);
+    *quot = overflow ? a : (a / b);
+    *rem = overflow ? 0 : (a % b);
+}
+
+static inline void DDIVU(uint64_t a, uint64_t b, uint64_t* quot, uint64_t* rem) {
+    *quot = a / b;
+    *rem = a % b;
+}
+
+typedef uint64_t gpr;
+
+#define SIGNED(val) \
+    ((int64_t)(val))
+
+#define ADD32(a, b) \
+    ((gpr)(int32_t)((a) + (b)))
+
+#define SUB32(a, b) \
+    ((gpr)(int32_t)((a) - (b)))
+
+#define MEM_W(offset, reg) \
+    (*(int32_t*)(rdram + ((((reg) + (offset))) - 0xFFFFFFFF80000000)))
+
+#define MEM_H(offset, reg) \
+    (*(int16_t*)(rdram + ((((reg) + (offset)) ^ 2) - 0xFFFFFFFF80000000)))
+
+#define MEM_B(offset, reg) \
+    (*(int8_t*)(rdram + ((((reg) + (offset)) ^ 3) - 0xFFFFFFFF80000000)))
+
+#define MEM_HU(offset, reg) \
+    (*(uint16_t*)(rdram + ((((reg) + (offset)) ^ 2) - 0xFFFFFFFF80000000)))
+
+#define MEM_BU(offset, reg) \
+    (*(uint8_t*)(rdram + ((((reg) + (offset)) ^ 3) - 0xFFFFFFFF80000000)))
+
+#define SD(val, offset, reg) { \
+    *(uint32_t*)(rdram + ((((reg) + (offset) + 4)) - 0xFFFFFFFF80000000)) = (uint32_t)((gpr)(val) >> 0); \
+    *(uint32_t*)(rdram + ((((reg) + (offset) + 0)) - 0xFFFFFFFF80000000)) = (uint32_t)((gpr)(val) >> 32); \
+}
+
+static inline uint64_t load_doubleword(uint8_t* rdram, gpr reg, gpr offset) {
+    uint64_t ret = 0;
+    uint64_t lo = (uint64_t)(uint32_t)MEM_W(reg, offset + 4);
+    uint64_t hi = (uint64_t)(uint32_t)MEM_W(reg, offset + 0);
+    ret = (lo << 0) | (hi << 32);
+    return ret;
+}
+
+#define LD(offset, reg) \
+    load_doubleword(rdram, offset, reg)
+
+static inline gpr do_lwl(uint8_t* rdram, gpr initial_value, gpr offset, gpr reg) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+
+    // Load the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t loaded_value = MEM_W(0, word_address);
+
+    // Mask the existing value and shift the loaded value appropriately
+    gpr misalignment = address & 0x3;
+    gpr masked_value = initial_value & (gpr)(uint32_t)~(0xFFFFFFFFu << (misalignment * 8));
+    loaded_value <<= (misalignment * 8);
+
+    // Cast to int32_t to sign extend first
+    return (gpr)(int32_t)(masked_value | loaded_value);
+}
+
+static inline gpr do_lwr(uint8_t* rdram, gpr initial_value, gpr offset, gpr reg) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+    
+    // Load the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t loaded_value = MEM_W(0, word_address);
+
+    // Mask the existing value and shift the loaded value appropriately
+    gpr misalignment = address & 0x3;
+    gpr masked_value = initial_value & (gpr)(uint32_t)~(0xFFFFFFFFu >> (24 - misalignment * 8));
+    loaded_value >>= (24 - misalignment * 8);
+
+    // Cast to int32_t to sign extend first
+    return (gpr)(int32_t)(masked_value | loaded_value);
+}
+
+static inline void do_swl(uint8_t* rdram, gpr offset, gpr reg, gpr val) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+
+    // Get the initial value of the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t initial_value = MEM_W(0, word_address);
+
+    // Mask the initial value and shift the input value appropriately
+    gpr misalignment = address & 0x3;
+    uint32_t masked_initial_value = initial_value & ~(0xFFFFFFFFu >> (misalignment * 8));
+    uint32_t shifted_input_value = ((uint32_t)val) >> (misalignment * 8);
+    MEM_W(0, word_address) = masked_initial_value | shifted_input_value;
+}
+
+static inline void do_swr(uint8_t* rdram, gpr offset, gpr reg, gpr val) {
+    // Calculate the overall address
+    gpr address = (offset + reg);
+
+    // Get the initial value of the aligned word
+    gpr word_address = address & ~0x3;
+    uint32_t initial_value = MEM_W(0, word_address);
+
+    // Mask the initial value and shift the input value appropriately
+    gpr misalignment = address & 0x3;
+    uint32_t masked_initial_value = initial_value & ~(0xFFFFFFFFu << (24 - misalignment * 8));
+    uint32_t shifted_input_value = ((uint32_t)val) << (24 - misalignment * 8);
+    MEM_W(0, word_address) = masked_initial_value | shifted_input_value;
+}
+
+static inline uint32_t get_cop1_cs() {
+    uint32_t rounding_mode = 0;
+    switch (fegetround()) {
+        // round to nearest value
+        case FE_TONEAREST:
+        default:
+            rounding_mode = 0;
+            break;
+        // round to zero (truncate)
+        case FE_TOWARDZERO:
+            rounding_mode = 1;
+            break;
+        // round to positive infinity (ceil)
+        case FE_UPWARD:
+            rounding_mode = 2;
+            break;
+        // round to negative infinity (floor)
+        case FE_DOWNWARD:
+            rounding_mode = 3;
+            break;
+    }
+    return rounding_mode;
+}
+
+static inline void set_cop1_cs(uint32_t val) {
+    uint32_t rounding_mode = val & 0x3;
+    int round = FE_TONEAREST;
+    switch (rounding_mode) {
+        case 0: // round to nearest value
+            round = FE_TONEAREST;
+            break;
+        case 1: // round to zero (truncate)
+            round = FE_TOWARDZERO;
+            break;
+        case 2: // round to positive infinity (ceil)
+            round = FE_UPWARD;
+            break;
+        case 3: // round to negative infinity (floor)
+            round = FE_DOWNWARD;
+            break;
+    }
+    fesetround(round);
+}
+
+#define S32(val) \
+    ((int32_t)(val))
+    
+#define U32(val) \
+    ((uint32_t)(val))
+
+#define S64(val) \
+    ((int64_t)(val))
+
+#define U64(val) \
+    ((uint64_t)(val))
+
+#define MUL_S(val1, val2) \
+    ((val1) * (val2))
+
+#define MUL_D(val1, val2) \
+    ((val1) * (val2))
+
+#define DIV_S(val1, val2) \
+    ((val1) / (val2))
+
+#define DIV_D(val1, val2) \
+    ((val1) / (val2))
+
+#define CVT_S_W(val) \
+    ((float)((int32_t)(val)))
+
+#define CVT_D_W(val) \
+    ((double)((int32_t)(val)))
+
+#define CVT_D_L(val) \
+    ((double)((int64_t)(val)))
+
+#define CVT_S_L(val) \
+    ((float)((int64_t)(val)))
+
+#define CVT_D_S(val) \
+    ((double)(val))
+
+#define CVT_S_D(val) \
+    ((float)(val))
+
+#define TRUNC_W_S(val) \
+    ((int32_t)(val))
+
+#define TRUNC_W_D(val) \
+    ((int32_t)(val))
+
+#define TRUNC_L_S(val) \
+    ((int64_t)(val))
+
+#define TRUNC_L_D(val) \
+    ((int64_t)(val))
+
+#define DEFAULT_ROUNDING_MODE 0
+
+static inline int32_t do_cvt_w_s(float val) {
+    // Rounding mode aware float to 32-bit int conversion.
+    return (int32_t)lrintf(val);
+}
+
+#define CVT_W_S(val) \
+    do_cvt_w_s(val)
+
+static inline int64_t do_cvt_l_s(float val) {
+    // Rounding mode aware float to 64-bit int conversion.
+    return (int64_t)llrintf(val);
+}
+
+#define CVT_L_S(val) \
+    do_cvt_l_s(val);
+
+static inline int32_t do_cvt_w_d(double val) {
+    // Rounding mode aware double to 32-bit int conversion.
+    return (int32_t)lrint(val);
+}
+
+#define CVT_W_D(val) \
+    do_cvt_w_d(val)
+
+static inline int64_t do_cvt_l_d(double val) {
+    // Rounding mode aware double to 64-bit int conversion.
+    return (int64_t)llrint(val);
+}
+
+#define CVT_L_D(val) \
+    do_cvt_l_d(val)
+
+#define NAN_CHECK(val) \
+    assert(val == val)
+
+//#define NAN_CHECK(val)
+
+typedef union {
+    double d;
+    struct {
+        float fl;
+        float fh;
+    };
+    struct {
+        uint32_t u32l;
+        uint32_t u32h;
+    };
+    uint64_t u64;
+} fpr;
+
+typedef struct {
+    gpr r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
+        r8,  r9,  r10, r11, r12, r13, r14, r15,
+        r16, r17, r18, r19, r20, r21, r22, r23,
+        r24, r25, r26, r27, r28, r29, r30, r31;
+    fpr f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
+        f8,  f9,  f10, f11, f12, f13, f14, f15,
+        f16, f17, f18, f19, f20, f21, f22, f23,
+        f24, f25, f26, f27, f28, f29, f30, f31;
+    uint64_t hi, lo;
+    uint32_t* f_odd;
+    uint32_t status_reg;
+    uint8_t mips3_float_mode;
+} recomp_context;
+
+// Checks if the target is an even float register or that mips3 float mode is enabled
+#define CHECK_FR(ctx, idx) \
+    assert(((idx) & 1) == 0 || (ctx)->mips3_float_mode)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void cop0_status_write(recomp_context* ctx, gpr value);
+gpr cop0_status_read(recomp_context* ctx);
+void switch_error(const char* func, uint32_t vram, uint32_t jtbl);
+void do_break(uint32_t vram);
+
+typedef void (recomp_func_t)(uint8_t* rdram, recomp_context* ctx);
+
+recomp_func_t* get_function(int32_t vram);
+
+#define LOOKUP_FUNC(val) \
+    get_function((int32_t)(val))
+
+extern int32_t* section_addresses;
+
+#define LO16(x) \
+    ((x) & 0xFFFF)
+
+#define HI16(x) \
+    (((x) >> 16) + (((x) >> 15) & 1))
+
+#define RELOC_HI16(section_index, offset) \
+    HI16(section_addresses[section_index] + (offset))
+
+#define RELOC_LO16(section_index, offset) \
+    LO16(section_addresses[section_index] + (offset))
+
+void recomp_syscall_handler(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram);
+
+void pause_self(uint8_t *rdram);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/include/recompiler/context.h
+++ b/include/recompiler/context.h
@ -36,6 +36,20 @@ namespace N64Recomp {
                : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {}
        Function() = default;
    };
+    
+    struct JumpTable {
+        uint32_t vram;
+        uint32_t addend_reg;
+        uint32_t rom;
+        uint32_t lw_vram;
+        uint32_t addu_vram;
+        uint32_t jr_vram;
+        uint16_t section_index;
+        std::vector<uint32_t> entries;
+
+        JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, uint16_t section_index, std::vector<uint32_t>&& entries)
+                : vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), section_index(section_index), entries(std::move(entries)) {}
+    };

    enum class RelocType : uint8_t {
        R_MIPS_NONE = 0,
@ -175,6 +189,8 @@ namespace N64Recomp {
        std::vector<ReferenceSymbol> reference_symbols;
        // Mapping of symbol name to reference symbol index.
        std::unordered_map<std::string, SymbolReference> reference_symbols_by_name;
+        // Whether all reference sections should be treated as relocatable (used in live recompilation).
+        bool all_reference_sections_relocatable = false;
    public:
        std::vector<Section> sections;
        std::vector<Function> functions;
@ -187,6 +203,8 @@ namespace N64Recomp {
        // The target ROM being recompiled, TODO move this outside of the context to avoid making a copy for mod contexts.
        // Used for reading relocations and for the output binary feature.
        std::vector<uint8_t> rom;
+        // Whether reference symbols should be validated when emitting function calls during recompilation.
+        bool skip_validating_reference_symbols = true;

        //// Only used by the CLI, TODO move this to a struct in the internal headers.
        // A mapping of function name to index in the functions vector
@ -359,6 +377,9 @@ namespace N64Recomp {
        }

        bool is_reference_section_relocatable(uint16_t section_index) const {
+            if (all_reference_sections_relocatable) {
+                return true;
+            }
            if (section_index == SectionAbsolute) {
                return false;
            }
@ -518,9 +539,15 @@ namespace N64Recomp {
        void copy_reference_sections_from(const Context& rhs) {
            reference_sections = rhs.reference_sections;
        }
+
+        void set_all_reference_sections_relocatable() {
+            all_reference_sections_relocatable = true;
+        }
    };

-    bool recompile_function(const Context& context, const Function& func, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs, bool tag_reference_relocs);
+    class Generator;
+    bool recompile_function(const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs, bool tag_reference_relocs);
+    bool recompile_function_custom(Generator& generator, const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs);

    enum class ModSymbolsError {
        Good,
--- a/include/recompiler/generator.h
+++ b/include/recompiler/generator.h
@ -0,0 +1,109 @@
+#ifndef __GENERATOR_H__
+#define __GENERATOR_H__
+
+#include "recompiler/context.h"
+#include "operations.h"
+
+namespace N64Recomp {
+    struct InstructionContext {
+        int rd;
+        int rs;
+        int rt;
+        int sa;
+
+        int fd;
+        int fs;
+        int ft;
+
+        int cop1_cs;
+
+        uint16_t imm16;
+
+        bool reloc_tag_as_reference;
+        RelocType reloc_type;
+        uint32_t reloc_section_index;
+        uint32_t reloc_target_section_offset;
+    };
+
+    class Generator {
+    public:
+        virtual void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const = 0;
+        virtual void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const = 0;
+        virtual void process_store_op(const StoreOp& op, const InstructionContext& ctx) const = 0;
+        virtual void emit_function_start(const std::string& function_name, size_t func_index) const = 0;
+        virtual void emit_function_end() const = 0;
+        virtual void emit_function_call_lookup(uint32_t addr) const = 0;
+        virtual void emit_function_call_by_register(int reg) const = 0;
+        // target_section_offset can each be deduced from symbol_index if the full context is available,
+        // but for live recompilation the reference symbol list is unavailable so it's still provided.
+        virtual void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const = 0;
+        virtual void emit_function_call(const Context& context, size_t function_index) const = 0;
+        virtual void emit_named_function_call(const std::string& function_name) const = 0;
+        virtual void emit_goto(const std::string& target) const = 0;
+        virtual void emit_label(const std::string& label_name) const = 0;
+        virtual void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const = 0;
+        virtual void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const = 0;
+        virtual void emit_branch_close() const = 0;
+        virtual void emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const = 0;
+        virtual void emit_case(int case_index, const std::string& target_label) const = 0;
+        virtual void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const = 0;
+        virtual void emit_switch_close() const = 0;
+        virtual void emit_return() const = 0;
+        virtual void emit_check_fr(int fpr) const = 0;
+        virtual void emit_check_nan(int fpr, bool is_double) const = 0;
+        virtual void emit_cop0_status_read(int reg) const = 0;
+        virtual void emit_cop0_status_write(int reg) const = 0;
+        virtual void emit_cop1_cs_read(int reg) const = 0;
+        virtual void emit_cop1_cs_write(int reg) const = 0;
+        virtual void emit_muldiv(InstrId instr_id, int reg1, int reg2) const = 0;
+        virtual void emit_syscall(uint32_t instr_vram) const = 0;
+        virtual void emit_do_break(uint32_t instr_vram) const = 0;
+        virtual void emit_pause_self() const = 0;
+        virtual void emit_trigger_event(uint32_t event_index) const = 0;
+        virtual void emit_comment(const std::string& comment) const = 0;
+    };
+
+    class CGenerator final : Generator {
+    public:
+        CGenerator(std::ostream& output_file) : output_file(output_file) {};
+        void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const final;
+        void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const final;
+        void process_store_op(const StoreOp& op, const InstructionContext& ctx) const final;
+        void emit_function_start(const std::string& function_name, size_t func_index) const final;
+        void emit_function_end() const final;
+        void emit_function_call_lookup(uint32_t addr) const final;
+        void emit_function_call_by_register(int reg) const final;
+        void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final;
+        void emit_function_call(const Context& context, size_t function_index) const final;
+        void emit_named_function_call(const std::string& function_name) const final;
+        void emit_goto(const std::string& target) const final;
+        void emit_label(const std::string& label_name) const final;
+        void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const final;
+        void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
+        void emit_branch_close() const final;
+        void emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const final;
+        void emit_case(int case_index, const std::string& target_label) const final;
+        void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const final;
+        void emit_switch_close() const final;
+        void emit_return() const final;
+        void emit_check_fr(int fpr) const final;
+        void emit_check_nan(int fpr, bool is_double) const final;
+        void emit_cop0_status_read(int reg) const final;
+        void emit_cop0_status_write(int reg) const final;
+        void emit_cop1_cs_read(int reg) const final;
+        void emit_cop1_cs_write(int reg) const final;
+        void emit_muldiv(InstrId instr_id, int reg1, int reg2) const final;
+        void emit_syscall(uint32_t instr_vram) const final;
+        void emit_do_break(uint32_t instr_vram) const final;
+        void emit_pause_self() const final;
+        void emit_trigger_event(uint32_t event_index) const final;
+        void emit_comment(const std::string& comment) const final;
+    private:
+        void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
+        void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
+        void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
+        std::ostream& output_file;
+    };
+}
+
+#endif
--- a/include/recompiler/live_recompiler.h
+++ b/include/recompiler/live_recompiler.h
@ -0,0 +1,141 @@
+#ifndef __LIVE_RECOMPILER_H__
+#define __LIVE_RECOMPILER_H__
+
+#include <unordered_map>
+#include "recompiler/generator.h"
+#include "recomp.h"
+
+struct sljit_compiler;
+
+namespace N64Recomp {
+    struct LiveGeneratorContext;
+    struct ReferenceJumpDetails {
+        uint16_t section;
+        uint32_t section_offset;
+    };
+    struct LiveGeneratorOutput {
+        LiveGeneratorOutput() = default;
+        LiveGeneratorOutput(const LiveGeneratorOutput& rhs) = delete;
+        LiveGeneratorOutput(LiveGeneratorOutput&& rhs) { *this = std::move(rhs); }
+        LiveGeneratorOutput& operator=(const LiveGeneratorOutput& rhs) = delete;
+        LiveGeneratorOutput& operator=(LiveGeneratorOutput&& rhs) {
+            good = rhs.good;
+            string_literals = std::move(rhs.string_literals);
+            jump_tables = std::move(rhs.jump_tables);
+            code = rhs.code;
+            code_size = rhs.code_size;
+            functions = std::move(rhs.functions);
+            reference_symbol_jumps = std::move(rhs.reference_symbol_jumps);
+            import_jumps_by_index = std::move(rhs.import_jumps_by_index);
+            executable_offset = rhs.executable_offset;
+
+            rhs.good = false;
+            rhs.code = nullptr;
+            rhs.code_size = 0;
+            rhs.reference_symbol_jumps.clear();
+            rhs.executable_offset = 0;
+
+            return *this;
+        }
+        ~LiveGeneratorOutput();
+        size_t num_reference_symbol_jumps() const;
+        void set_reference_symbol_jump(size_t jump_index, recomp_func_t* func);
+        ReferenceJumpDetails get_reference_symbol_jump_details(size_t jump_index);
+        void populate_import_symbol_jumps(size_t import_index, recomp_func_t* func);
+        bool good = false;
+        // Storage for string literals referenced by recompiled code. These are allocated as unique_ptr arrays
+        // to prevent them from moving, as the referenced address is baked into the recompiled code.
+        std::vector<std::unique_ptr<char[]>> string_literals;
+        // Storage for jump tables referenced by recompiled code (vector of arrays of pointers). These are also
+        // allocated as unique_ptr arrays for the same reason as strings.
+        std::vector<std::unique_ptr<void*[]>> jump_tables;
+        // Recompiled code.
+        void* code;
+        // Size of the recompiled code.
+        size_t code_size;
+        // Pointers to each individual function within the recompiled code.
+        std::vector<recomp_func_t*> functions;
+    private:
+        // List of jump details and the corresponding jump instruction address. These jumps get populated after recompilation is complete
+        // during dependency resolution.
+        std::vector<std::pair<ReferenceJumpDetails, void*>> reference_symbol_jumps;
+        // Mapping of import symbol index to any jumps to that import symbol.
+        std::unordered_multimap<size_t, void*> import_jumps_by_index;
+        // sljit executable offset.
+        int64_t executable_offset;
+
+        friend class LiveGenerator;
+    };
+    struct LiveGeneratorInputs {
+        uint32_t base_event_index;
+        void (*cop0_status_write)(recomp_context* ctx, gpr value);
+        gpr (*cop0_status_read)(recomp_context* ctx);
+        void (*switch_error)(const char* func, uint32_t vram, uint32_t jtbl);
+        void (*do_break)(uint32_t vram);
+        recomp_func_t* (*get_function)(int32_t vram);
+        void (*syscall_handler)(uint8_t* rdram, recomp_context* ctx, int32_t instruction_vram);
+        void (*pause_self)(uint8_t* rdram);
+        void (*trigger_event)(uint8_t* rdram, recomp_context* ctx, uint32_t event_index);
+        int32_t *reference_section_addresses;
+        int32_t *local_section_addresses;
+    };
+    class LiveGenerator final : public Generator {
+    public:
+        LiveGenerator(size_t num_funcs, const LiveGeneratorInputs& inputs);
+        ~LiveGenerator();
+        // Prevent moving or copying.
+        LiveGenerator(const LiveGenerator& rhs) = delete;
+        LiveGenerator(LiveGenerator&& rhs) = delete;
+        LiveGenerator& operator=(const LiveGenerator& rhs) = delete;
+        LiveGenerator& operator=(LiveGenerator&& rhs) = delete;
+
+        LiveGeneratorOutput finish();
+        void process_binary_op(const BinaryOp& op, const InstructionContext& ctx) const final;
+        void process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const final;
+        void process_store_op(const StoreOp& op, const InstructionContext& ctx) const final;
+        void emit_function_start(const std::string& function_name, size_t func_index) const final;
+        void emit_function_end() const final;
+        void emit_function_call_lookup(uint32_t addr) const final;
+        void emit_function_call_by_register(int reg) const final;
+        void emit_function_call_reference_symbol(const Context& context, uint16_t section_index, size_t symbol_index, uint32_t target_section_offset) const final;
+        void emit_function_call(const Context& context, size_t function_index) const final;
+        void emit_named_function_call(const std::string& function_name) const final;
+        void emit_goto(const std::string& target) const final;
+        void emit_label(const std::string& label_name) const final;
+        void emit_jtbl_addend_declaration(const JumpTable& jtbl, int reg) const final;
+        void emit_branch_condition(const ConditionalBranchOp& op, const InstructionContext& ctx) const final;
+        void emit_branch_close() const final;
+        void emit_switch(const Context& recompiler_context, const JumpTable& jtbl, int reg) const final;
+        void emit_case(int case_index, const std::string& target_label) const final;
+        void emit_switch_error(uint32_t instr_vram, uint32_t jtbl_vram) const final;
+        void emit_switch_close() const final;
+        void emit_return() const final;
+        void emit_check_fr(int fpr) const final;
+        void emit_check_nan(int fpr, bool is_double) const final;
+        void emit_cop0_status_read(int reg) const final;
+        void emit_cop0_status_write(int reg) const final;
+        void emit_cop1_cs_read(int reg) const final;
+        void emit_cop1_cs_write(int reg) const final;
+        void emit_muldiv(InstrId instr_id, int reg1, int reg2) const final;
+        void emit_syscall(uint32_t instr_vram) const final;
+        void emit_do_break(uint32_t instr_vram) const final;
+        void emit_pause_self() const final;
+        void emit_trigger_event(uint32_t event_index) const final;
+        void emit_comment(const std::string& comment) const final;
+    private:
+        void get_operand_string(Operand operand, UnaryOpType operation, const InstructionContext& context, std::string& operand_string) const;
+        void get_binary_expr_string(BinaryOpType type, const BinaryOperands& operands, const InstructionContext& ctx, const std::string& output, std::string& expr_string) const;
+        void get_notation(BinaryOpType op_type, std::string& func_string, std::string& infix_string) const;
+        // Loads the relocated address specified by the instruction context into the target register.
+        void load_relocated_address(const InstructionContext& ctx, int reg) const;
+        sljit_compiler* compiler;
+        LiveGeneratorInputs inputs;
+        mutable std::unique_ptr<LiveGeneratorContext> context;
+        mutable bool errored;
+    };
+
+    void live_recompiler_init();
+    bool recompile_function_live(LiveGenerator& generator, const Context& context, size_t function_index, std::ostream& output_file, std::span<std::vector<uint32_t>> static_funcs_out, bool tag_reference_relocs);
+}
+
+#endif
--- a/include/recompiler/operations.h
+++ b/include/recompiler/operations.h
@ -28,13 +28,12 @@ namespace N64Recomp {
        ToU32,
        ToS64,
        ToU64,
-        NegateS32,
-        NegateS64,
        Lui,
        Mask5, // Mask to 5 bits
        Mask6, // Mask to 5 bits
        ToInt32, // Functionally equivalent to ToS32, only exists for parity with old codegen
-        Negate,
+        NegateFloat,
+        NegateDouble,
        AbsFloat,
        AbsDouble,
        SqrtFloat,
@ -51,12 +50,20 @@ namespace N64Recomp {
        ConvertLFromS,
        TruncateWFromS,
        TruncateWFromD,
+        TruncateLFromS,
+        TruncateLFromD,
        RoundWFromS,
        RoundWFromD,
+        RoundLFromS,
+        RoundLFromD,
        CeilWFromS,
        CeilWFromD,
+        CeilLFromS,
+        CeilLFromD,
        FloorWFromS,
-        FloorWFromD
+        FloorWFromD,
+        FloorLFromS,
+        FloorLFromD
    };

    enum class BinaryOpType {
@ -92,6 +99,12 @@ namespace N64Recomp {
        LessEq,
        Greater,
        GreaterEq,
+        EqualFloat,
+        LessFloat,
+        LessEqFloat,
+        EqualDouble,
+        LessDouble,
+        LessEqDouble,
        // Loads
        LD,
        LW,