diff --git a/LiveRecomp/live_generator.cpp b/LiveRecomp/live_generator.cpp index ae2f1b9..ad1c336 100644 --- a/LiveRecomp/live_generator.cpp +++ b/LiveRecomp/live_generator.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "fmt/format.h" #include "fmt/ostream.h" @@ -701,6 +702,54 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr assert(!failed); } +int32_t do_round_w_s(float num) { + return lroundf(num); +} + +int32_t do_round_w_d(double num) { + return lround(num); +} + +int64_t do_round_l_s(float num) { + return llroundf(num); +} + +int64_t do_round_l_d(double num) { + return llround(num); +} + +int32_t do_ceil_w_s(float num) { + return (int32_t)ceilf(num); +} + +int32_t do_ceil_w_d(double num) { + return (int32_t)ceil(num); +} + +int64_t do_ceil_l_s(float num) { + return (int64_t)ceilf(num); +} + +int64_t do_ceil_l_d(double num) { + return (int64_t)ceil(num); +} + +int32_t do_floor_w_s(float num) { + return (int32_t)floorf(num); +} + +int32_t do_floor_w_d(double num) { + return (int32_t)floor(num); +} + +int64_t do_floor_l_s(float num) { + return (int64_t)floorf(num); +} + +int64_t do_floor_l_d(double num) { + return (int64_t)floor(num); +} + void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const { // Skip instructions that output to $zero if (outputs_to_zero(op.output, ctx)) { @@ -725,10 +774,59 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc assert(false); } - sljit_s32 jit_op; + sljit_s32 jit_op = SLJIT_BREAKPOINT; bool failed = false; bool float_op = false; + bool func_float_op = false; + + auto emit_s_func = [this, src, srcw, dst, dstw, &func_float_op](float (*func)(float)) { + func_float_op = true; + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, src, srcw); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(F32, F32), SLJIT_IMM, sljit_sw(func)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, dst, dstw, SLJIT_RETURN_FREG, 0); + }; + + auto emit_d_func = [this, src, srcw, dst, dstw, &func_float_op](double (*func)(double)) { + func_float_op = true; + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, src, srcw); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(F64, F64), SLJIT_IMM, sljit_sw(func)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, dst, dstw, SLJIT_RETURN_FREG, 0); + }; + + auto emit_l_from_s_func = [this, src, srcw, dst, dstw, &func_float_op](int64_t (*func)(float)) { + func_float_op = true; + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, src, srcw); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(P, F32), SLJIT_IMM, sljit_sw(func)); + sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_RETURN_REG, 0); + }; + + auto emit_w_from_s_func = [this, src, srcw, dst, dstw, &func_float_op](int32_t (*func)(float)) { + func_float_op = true; + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, src, srcw); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(32, F32), SLJIT_IMM, sljit_sw(func)); + sljit_emit_op1(compiler, SLJIT_MOV_S32, dst, dstw, SLJIT_RETURN_REG, 0); + }; + + auto emit_l_from_d_func = [this, src, srcw, dst, dstw, &func_float_op](int64_t (*func)(double)) { + func_float_op = true; + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, src, srcw); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(P, F64), SLJIT_IMM, sljit_sw(func)); + sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_RETURN_REG, 0); + }; + + auto emit_w_from_d_func = [this, src, srcw, dst, dstw, &func_float_op](int32_t (*func)(double)) { + func_float_op = true; + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, src, srcw); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(32, F64), SLJIT_IMM, sljit_sw(func)); + sljit_emit_op1(compiler, SLJIT_MOV_S32, dst, dstw, SLJIT_RETURN_REG, 0); + }; switch (op.operation) { case UnaryOpType::Lui: @@ -748,15 +846,134 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc jit_op = SLJIT_NEG_F64; float_op = true; break; + case UnaryOpType::AbsFloat: + jit_op = SLJIT_ABS_F32; + float_op = true; + break; + case UnaryOpType::AbsDouble: + jit_op = SLJIT_ABS_F64; + float_op = true; + break; + case UnaryOpType::SqrtFloat: + emit_s_func(sqrtf); + break; + case UnaryOpType::SqrtDouble: + emit_d_func(sqrt); + break; + case UnaryOpType::ConvertSFromW: + jit_op = SLJIT_CONV_F32_FROM_S32; + float_op = true; + break; + case UnaryOpType::ConvertWFromS: + emit_w_from_s_func(do_cvt_w_s); + break; + case UnaryOpType::ConvertDFromW: + jit_op = SLJIT_CONV_F64_FROM_S32; + float_op = true; + break; + case UnaryOpType::ConvertWFromD: + emit_w_from_d_func(do_cvt_w_d); + break; + case UnaryOpType::ConvertDFromS: + jit_op = SLJIT_CONV_F64_FROM_F32; + float_op = true; + break; + case UnaryOpType::ConvertSFromD: + // SLJIT_CONV_F32_FROM_F64 uses the current rounding mode, just as CVT_S_D does. + jit_op = SLJIT_CONV_F32_FROM_F64; + float_op = true; + break; + case UnaryOpType::ConvertDFromL: + jit_op = SLJIT_CONV_F64_FROM_SW; + float_op = true; + break; + case UnaryOpType::ConvertLFromD: + emit_l_from_d_func(do_cvt_l_d); + break; + case UnaryOpType::ConvertSFromL: + jit_op = SLJIT_CONV_F32_FROM_SW; + float_op = true; + break; + case UnaryOpType::ConvertLFromS: + emit_l_from_s_func(do_cvt_l_s); + break; + case UnaryOpType::TruncateWFromS: + // SLJIT_CONV_S32_FROM_F32 rounds towards zero, just as TRUNC_W_S does. + jit_op = SLJIT_CONV_S32_FROM_F32; + float_op = true; + break; + case UnaryOpType::TruncateWFromD: + // SLJIT_CONV_S32_FROM_F64 rounds towards zero, just as TRUNC_W_D does. + jit_op = SLJIT_CONV_S32_FROM_F64; + float_op = true; + break; + case UnaryOpType::TruncateLFromS: + // SLJIT_CONV_SW_FROM_F32 rounds towards zero, just as TRUNC_L_S does. + jit_op = SLJIT_CONV_SW_FROM_F32; + float_op = true; + break; + case UnaryOpType::TruncateLFromD: + // SLJIT_CONV_SW_FROM_F64 rounds towards zero, just as TRUNC_L_D does. + jit_op = SLJIT_CONV_SW_FROM_F64; + float_op = true; + break; + case UnaryOpType::RoundWFromS: + emit_w_from_s_func(do_round_w_s); + break; + case UnaryOpType::RoundWFromD: + emit_w_from_d_func(do_round_w_d); + break; + case UnaryOpType::RoundLFromS: + emit_l_from_s_func(do_round_l_s); + break; + case UnaryOpType::RoundLFromD: + emit_l_from_d_func(do_round_l_d); + break; + case UnaryOpType::CeilWFromS: + emit_w_from_s_func(do_ceil_w_s); + break; + case UnaryOpType::CeilWFromD: + emit_w_from_d_func(do_ceil_w_d); + break; + case UnaryOpType::CeilLFromS: + emit_l_from_s_func(do_ceil_l_s); + break; + case UnaryOpType::CeilLFromD: + emit_l_from_d_func(do_ceil_l_d); + break; + case UnaryOpType::FloorWFromS: + emit_w_from_s_func(do_floor_w_s); + break; + case UnaryOpType::FloorWFromD: + emit_w_from_d_func(do_floor_w_d); + break; + case UnaryOpType::FloorLFromS: + emit_l_from_s_func(do_floor_l_s); + break; + case UnaryOpType::FloorLFromD: + emit_l_from_d_func(do_floor_l_d); + break; case UnaryOpType::None: jit_op = SLJIT_MOV; break; - default: - assert(false); + case UnaryOpType::ToS32: + case UnaryOpType::ToInt32: + jit_op = SLJIT_MOV_S32; + break; + // Unary ops that can't be used as a standalone operation + case UnaryOpType::ToU32: + case UnaryOpType::ToS64: + case UnaryOpType::ToU64: + case UnaryOpType::Mask5: + case UnaryOpType::Mask6: + assert(false && "Unsupported unary op"); return; } - if (float_op) { + if (func_float_op) { + // Already handled by the lambda. + } + else if (float_op) { sljit_emit_fop1(compiler, jit_op, dst, dstw, src, srcw); } else { @@ -821,7 +1038,7 @@ void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const Instruc void N64Recomp::LiveGenerator::emit_function_start(const std::string& function_name, size_t func_index) const { context->function_name = function_name; context->func_labels[func_index] = sljit_emit_label(compiler); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4, 5, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_FLOAT(1), 5 | SLJIT_ENTER_FLOAT(0), 0); sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset); } @@ -1083,14 +1300,15 @@ void N64Recomp::LiveGenerator::emit_cop0_status_write(int reg) const { void N64Recomp::LiveGenerator::emit_cop1_cs_read(int reg) const { // Skip the read if the target is the zero register. if (reg != 0) { - // Load ctx into R0. - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, Registers::ctx, 0); + sljit_sw dst; + sljit_sw dstw; + get_gpr_values(reg, dst, dstw); - // Call cop1_cs_read. - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P,32), SLJIT_IMM, sljit_sw(inputs.cop1_cs_read)); + // Call get_cop1_cs. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS0(32), SLJIT_IMM, sljit_sw(get_cop1_cs)); // Store the result in the output register. - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(reg), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S32, dst, dstw, SLJIT_RETURN_REG, 0); } } @@ -1099,12 +1317,11 @@ void N64Recomp::LiveGenerator::emit_cop1_cs_write(int reg) const { sljit_sw srcw; get_gpr_values(reg, src, srcw); - // Load ctx and the input register value into R0 and R1 - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, Registers::ctx, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, src, srcw); + // Load the input register value into R0. + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw); - // Call cop1_cs_write. - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P,32), SLJIT_IMM, sljit_sw(inputs.cop1_cs_write)); + // Call set_cop1_cs. + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1V(32), SLJIT_IMM, sljit_sw(set_cop1_cs)); } void N64Recomp::LiveGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const { diff --git a/LiveRecomp/live_recompiler_test.cpp b/LiveRecomp/live_recompiler_test.cpp index ee537f5..9f9a488 100644 --- a/LiveRecomp/live_recompiler_test.cpp +++ b/LiveRecomp/live_recompiler_test.cpp @@ -227,10 +227,14 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te auto before_execution = std::chrono::system_clock::now(); + int old_rounding = fegetround(); + // Run the generated code. ctx.r29 = 0xFFFFFFFF80000000 + rdram.size() - 0x10; // Set the stack pointer. output.functions[start_func_index](rdram.data(), &ctx); + fesetround(old_rounding); + auto after_execution = std::chrono::system_clock::now(); // Check the result of running the code. diff --git a/include/recomp.h b/include/recomp.h index 69079c5..d291eec 100644 --- a/include/recomp.h +++ b/include/recomp.h @@ -4,21 +4,33 @@ #include #include #include +#include #include // Compiler definition to disable inter-procedural optimization, allowing multiple functions to be in a single file without breaking interposition. -#if defined(_MSC_VER) && !defined(__clang__) +#if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) // MSVC's __declspec(noinline) seems to disable inter-procedural optimization entirely, so it's all that's needed. #define RECOMP_FUNC __declspec(noinline) + + // Use MSVC's fenv_access pragma. + #define SET_FENV_ACCESS() _Pragma("fenv_access(on)") #elif defined(__clang__) // Clang has no dedicated IPO attribute, so we use a combination of other attributes to give the desired behavior. // The inline keyword allows multiple definitions during linking, and extern forces clang to emit an externally visible definition. // Weak forces Clang to not perform any IPO as the symbol can be interposed, which prevents actual inlining due to the inline keyword. // Add noinline on for good measure, which doesn't conflict with the inline keyword as they have different meanings. #define RECOMP_FUNC extern inline __attribute__((weak,noinline)) -#elif defined(__GNUC__) - // Use GCC's attribute for disabling inter-procedural optimizations. - #define RECOMP_FUNC __attribute__((noipa)) + + // Use the standard STDC FENV_ACCESS pragma. + #define SET_FENV_ACCESS() _Pragma("STDC FENV_ACCESS ON") +#elif defined(__GNUC__) && !defined(__INTEL_COMPILER) + // Use GCC's attribute for disabling inter-procedural optimizations. Also enable the rounding-math compiler flag to disable + // constant folding so that arithmetic respects the floating point environment. This is needed because gcc doesn't implement + // any FENV_ACCESS pragma. + #define RECOMP_FUNC __attribute__((noipa, optimize("rounding-math"))) + + // There's no FENV_ACCESS pragma in gcc, so this can be empty. + #define SET_FENV_ACCESS() #else #error "No RECOMP_FUNC definition for this compiler" #endif @@ -26,18 +38,15 @@ // Implementation of 64-bit multiply and divide instructions #if defined(__SIZEOF_INT128__) -typedef __int128 int128_t; -typedef unsigned __int128 uint128_t; - static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) { - int128_t full128 = ((int128_t)a) * ((int128_t)b); + __int128 full128 = ((__int128)a) * ((__int128)b); *hi64 = (int64_t)(full128 >> 64); *lo64 = (int64_t)(full128 >> 0); } static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) { - uint128_t full128 = ((uint128_t)a) * ((uint128_t)b); + unsigned __int128 full128 = ((unsigned __int128)a) * ((unsigned __int128)b); *hi64 = (uint64_t)(full128 >> 64); *lo64 = (uint64_t)(full128 >> 0); @@ -62,7 +71,7 @@ static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64 #endif static inline void DDIV(int64_t a, int64_t b, int64_t* quot, int64_t* rem) { - bool overflow = ((uint64_t)a == 0x8000000000000000ull) && (b == -1ll); + int overflow = ((uint64_t)a == 0x8000000000000000ull) && (b == -1ll); *quot = overflow ? a : (a / b); *rem = overflow ? 0 : (a % b); } @@ -178,6 +187,50 @@ static inline void do_swr(uint8_t* rdram, gpr offset, gpr reg, gpr val) { MEM_W(0, word_address) = masked_initial_value | shifted_input_value; } +static inline uint32_t get_cop1_cs() { + uint32_t rounding_mode = 0; + switch (fegetround()) { + // round to nearest value + case FE_TONEAREST: + default: + rounding_mode = 0; + break; + // round to zero (truncate) + case FE_TOWARDZERO: + rounding_mode = 1; + break; + // round to positive infinity (ceil) + case FE_UPWARD: + rounding_mode = 2; + break; + // round to negative infinity (floor) + case FE_DOWNWARD: + rounding_mode = 3; + break; + } + return rounding_mode; +} + +static inline void set_cop1_cs(uint32_t val) { + uint32_t rounding_mode = val & 0x3; + int round = FE_TONEAREST; + switch (rounding_mode) { + case 0: // round to nearest value + round = FE_TONEAREST; + break; + case 1: // round to zero (truncate) + round = FE_TOWARDZERO; + break; + case 2: // round to positive infinity (ceil) + round = FE_UPWARD; + break; + case 3: // round to negative infinity (floor) + round = FE_DOWNWARD; + break; + } + fesetround(round); +} + #define S32(val) \ ((int32_t)(val)) @@ -234,77 +287,37 @@ static inline void do_swr(uint8_t* rdram, gpr offset, gpr reg, gpr val) { #define DEFAULT_ROUNDING_MODE 0 -static inline int32_t do_cvt_w_s(float val, unsigned int rounding_mode) { - switch (rounding_mode) { - case 0: // round to nearest value - return (int32_t)lroundf(val); - case 1: // round to zero (truncate) - return (int32_t)val; - case 2: // round to positive infinity (ceil) - return (int32_t)ceilf(val); - case 3: // round to negative infinity (floor) - return (int32_t)floorf(val); - } - assert(0); - return 0; +static inline int32_t do_cvt_w_s(float val) { + // Rounding mode aware float to 32-bit int conversion. + return (int32_t)lrintf(val); } #define CVT_W_S(val) \ - do_cvt_w_s(val, rounding_mode) + do_cvt_w_s(val) -static inline int32_t do_cvt_w_d(double val, unsigned int rounding_mode) { - switch (rounding_mode) { - case 0: // round to nearest value - return (int32_t)lround(val); - case 1: // round to zero (truncate) - return (int32_t)val; - case 2: // round to positive infinity (ceil) - return (int32_t)ceil(val); - case 3: // round to negative infinity (floor) - return (int32_t)floor(val); - } - assert(0); - return 0; -} - -#define CVT_W_D(val) \ - do_cvt_w_d(val, rounding_mode) - -static inline int64_t do_cvt_l_s(float val, unsigned int rounding_mode) { - switch (rounding_mode) { - case 0: // round to nearest value - return (int64_t)llroundf(val); - case 1: // round to zero (truncate) - return (int64_t)val; - case 2: // round to positive infinity (ceil) - return (int64_t)ceilf(val); - case 3: // round to negative infinity (floor) - return (int64_t)floorf(val); - } - assert(0); - return 0; +static inline int64_t do_cvt_l_s(float val) { + // Rounding mode aware float to 64-bit int conversion. + return (int64_t)llrintf(val); } #define CVT_L_S(val) \ - do_cvt_l_s(val, rounding_mode) + do_cvt_l_s(val); -static inline int64_t do_cvt_l_d(double val, unsigned int rounding_mode) { - switch (rounding_mode) { - case 0: // round to nearest value - return (int64_t)llround(val); - case 1: // round to zero (truncate) - return (int64_t)val; - case 2: // round to positive infinity (ceil) - return (int64_t)ceil(val); - case 3: // round to negative infinity (floor) - return (int64_t)floor(val); - } - assert(0); - return 0; +static inline int32_t do_cvt_w_d(double val) { + // Rounding mode aware double to 32-bit int conversion. + return (int32_t)lrint(val); +} + +#define CVT_W_D(val) \ + do_cvt_w_d(val) + +static inline int64_t do_cvt_l_d(double val) { + // Rounding mode aware double to 64-bit int conversion. + return (int64_t)llrint(val); } #define CVT_L_D(val) \ - do_cvt_l_d(val, rounding_mode) + do_cvt_l_d(val) #define NAN_CHECK(val) \ assert(val == val) diff --git a/include/recompiler/live_recompiler.h b/include/recompiler/live_recompiler.h index 3915c03..76fa812 100644 --- a/include/recompiler/live_recompiler.h +++ b/include/recompiler/live_recompiler.h @@ -48,8 +48,6 @@ namespace N64Recomp { uint32_t base_event_index; void (*cop0_status_write)(recomp_context* ctx, gpr value); gpr (*cop0_status_read)(recomp_context* ctx); - void (*cop1_cs_read)(recomp_context* ctx, gpr value); - gpr (*cop1_cs_write)(recomp_context* ctx); void (*switch_error)(const char* func, uint32_t vram, uint32_t jtbl); void (*do_break)(uint32_t vram); recomp_func_t* (*get_function)(int32_t vram); diff --git a/include/recompiler/operations.h b/include/recompiler/operations.h index abca35b..4e7cd67 100644 --- a/include/recompiler/operations.h +++ b/include/recompiler/operations.h @@ -50,12 +50,20 @@ namespace N64Recomp { ConvertLFromS, TruncateWFromS, TruncateWFromD, + TruncateLFromS, + TruncateLFromD, RoundWFromS, RoundWFromD, + RoundLFromS, + RoundLFromD, CeilWFromS, CeilWFromD, + CeilLFromS, + CeilLFromD, FloorWFromS, - FloorWFromD + FloorWFromD, + FloorLFromS, + FloorLFromD }; enum class BinaryOpType { diff --git a/src/cgenerator.cpp b/src/cgenerator.cpp index ee6a819..1ca565d 100644 --- a/src/cgenerator.cpp +++ b/src/cgenerator.cpp @@ -289,24 +289,48 @@ void N64Recomp::CGenerator::get_operand_string(Operand operand, UnaryOpType oper case UnaryOpType::TruncateWFromD: operand_string = "TRUNC_W_D(" + operand_string + ")"; break; + case UnaryOpType::TruncateLFromS: + operand_string = "TRUNC_L_S(" + operand_string + ")"; + break; + case UnaryOpType::TruncateLFromD: + operand_string = "TRUNC_L_D(" + operand_string + ")"; + break; case UnaryOpType::RoundWFromS: operand_string = "lroundf(" + operand_string + ")"; break; case UnaryOpType::RoundWFromD: operand_string = "lround(" + operand_string + ")"; break; + case UnaryOpType::RoundLFromS: + operand_string = "llroundf(" + operand_string + ")"; + break; + case UnaryOpType::RoundLFromD: + operand_string = "llround(" + operand_string + ")"; + break; case UnaryOpType::CeilWFromS: operand_string = "S32(ceilf(" + operand_string + "))"; break; case UnaryOpType::CeilWFromD: operand_string = "S32(ceil(" + operand_string + "))"; break; + case UnaryOpType::CeilLFromS: + operand_string = "S64(ceilf(" + operand_string + "))"; + break; + case UnaryOpType::CeilLFromD: + operand_string = "S64(ceil(" + operand_string + "))"; + break; case UnaryOpType::FloorWFromS: operand_string = "S32(floorf(" + operand_string + "))"; break; case UnaryOpType::FloorWFromD: operand_string = "S32(floor(" + operand_string + "))"; break; + case UnaryOpType::FloorLFromS: + operand_string = "S64(floorf(" + operand_string + "))"; + break; + case UnaryOpType::FloorLFromD: + operand_string = "S64(floor(" + operand_string + "))"; + break; } } @@ -367,7 +391,6 @@ void N64Recomp::CGenerator::emit_function_start(const std::string& function_name "RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n" // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output " uint64_t hi = 0, lo = 0, result = 0;\n" - " unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n" " int c1cs = 0;\n", // cop1 conditional signal function_name); } @@ -461,11 +484,11 @@ void N64Recomp::CGenerator::emit_cop0_status_write(int reg) const { } void N64Recomp::CGenerator::emit_cop1_cs_read(int reg) const { - fmt::print(output_file, "{} = rounding_mode;\n", gpr_to_string(reg)); + fmt::print(output_file, "{} = get_cop1_cs();\n", gpr_to_string(reg)); } void N64Recomp::CGenerator::emit_cop1_cs_write(int reg) const { - fmt::print(output_file, "rounding_mode = ({}) & 0x3;\n", gpr_to_string(reg)); + fmt::print(output_file, "set_cop1_cs({});\n", gpr_to_string(reg)); } void N64Recomp::CGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const {