mirror of
https://github.com/N64Recomp/N64Recomp.git
synced 2025-05-14 08:12:19 +00:00
Implement float operations for live generator, switch to native rounding mode for cop1 cs, fix 128-bit typedef errors
This commit is contained in:
parent
8b019567bc
commit
9fa9adbe3a
6 changed files with 354 additions and 91 deletions
|
@ -1,6 +1,7 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include "fmt/format.h"
|
#include "fmt/format.h"
|
||||||
#include "fmt/ostream.h"
|
#include "fmt/ostream.h"
|
||||||
|
@ -701,6 +702,54 @@ void N64Recomp::LiveGenerator::process_binary_op(const BinaryOp& op, const Instr
|
||||||
assert(!failed);
|
assert(!failed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t do_round_w_s(float num) {
|
||||||
|
return lroundf(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t do_round_w_d(double num) {
|
||||||
|
return lround(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t do_round_l_s(float num) {
|
||||||
|
return llroundf(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t do_round_l_d(double num) {
|
||||||
|
return llround(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t do_ceil_w_s(float num) {
|
||||||
|
return (int32_t)ceilf(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t do_ceil_w_d(double num) {
|
||||||
|
return (int32_t)ceil(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t do_ceil_l_s(float num) {
|
||||||
|
return (int64_t)ceilf(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t do_ceil_l_d(double num) {
|
||||||
|
return (int64_t)ceil(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t do_floor_w_s(float num) {
|
||||||
|
return (int32_t)floorf(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t do_floor_w_d(double num) {
|
||||||
|
return (int32_t)floor(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t do_floor_l_s(float num) {
|
||||||
|
return (int64_t)floorf(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t do_floor_l_d(double num) {
|
||||||
|
return (int64_t)floor(num);
|
||||||
|
}
|
||||||
|
|
||||||
void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const {
|
void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const InstructionContext& ctx) const {
|
||||||
// Skip instructions that output to $zero
|
// Skip instructions that output to $zero
|
||||||
if (outputs_to_zero(op.output, ctx)) {
|
if (outputs_to_zero(op.output, ctx)) {
|
||||||
|
@ -725,10 +774,59 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
sljit_s32 jit_op;
|
sljit_s32 jit_op = SLJIT_BREAKPOINT;
|
||||||
|
|
||||||
bool failed = false;
|
bool failed = false;
|
||||||
bool float_op = false;
|
bool float_op = false;
|
||||||
|
bool func_float_op = false;
|
||||||
|
|
||||||
|
auto emit_s_func = [this, src, srcw, dst, dstw, &func_float_op](float (*func)(float)) {
|
||||||
|
func_float_op = true;
|
||||||
|
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, src, srcw);
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(F32, F32), SLJIT_IMM, sljit_sw(func));
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F32, dst, dstw, SLJIT_RETURN_FREG, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto emit_d_func = [this, src, srcw, dst, dstw, &func_float_op](double (*func)(double)) {
|
||||||
|
func_float_op = true;
|
||||||
|
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, src, srcw);
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(F64, F64), SLJIT_IMM, sljit_sw(func));
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F64, dst, dstw, SLJIT_RETURN_FREG, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto emit_l_from_s_func = [this, src, srcw, dst, dstw, &func_float_op](int64_t (*func)(float)) {
|
||||||
|
func_float_op = true;
|
||||||
|
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, src, srcw);
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(P, F32), SLJIT_IMM, sljit_sw(func));
|
||||||
|
sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_RETURN_REG, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto emit_w_from_s_func = [this, src, srcw, dst, dstw, &func_float_op](int32_t (*func)(float)) {
|
||||||
|
func_float_op = true;
|
||||||
|
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, src, srcw);
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(32, F32), SLJIT_IMM, sljit_sw(func));
|
||||||
|
sljit_emit_op1(compiler, SLJIT_MOV_S32, dst, dstw, SLJIT_RETURN_REG, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto emit_l_from_d_func = [this, src, srcw, dst, dstw, &func_float_op](int64_t (*func)(double)) {
|
||||||
|
func_float_op = true;
|
||||||
|
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, src, srcw);
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(P, F64), SLJIT_IMM, sljit_sw(func));
|
||||||
|
sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_RETURN_REG, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto emit_w_from_d_func = [this, src, srcw, dst, dstw, &func_float_op](int32_t (*func)(double)) {
|
||||||
|
func_float_op = true;
|
||||||
|
|
||||||
|
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, src, srcw);
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1(32, F64), SLJIT_IMM, sljit_sw(func));
|
||||||
|
sljit_emit_op1(compiler, SLJIT_MOV_S32, dst, dstw, SLJIT_RETURN_REG, 0);
|
||||||
|
};
|
||||||
|
|
||||||
switch (op.operation) {
|
switch (op.operation) {
|
||||||
case UnaryOpType::Lui:
|
case UnaryOpType::Lui:
|
||||||
|
@ -748,15 +846,134 @@ void N64Recomp::LiveGenerator::process_unary_op(const UnaryOp& op, const Instruc
|
||||||
jit_op = SLJIT_NEG_F64;
|
jit_op = SLJIT_NEG_F64;
|
||||||
float_op = true;
|
float_op = true;
|
||||||
break;
|
break;
|
||||||
|
case UnaryOpType::AbsFloat:
|
||||||
|
jit_op = SLJIT_ABS_F32;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::AbsDouble:
|
||||||
|
jit_op = SLJIT_ABS_F64;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::SqrtFloat:
|
||||||
|
emit_s_func(sqrtf);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::SqrtDouble:
|
||||||
|
emit_d_func(sqrt);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertSFromW:
|
||||||
|
jit_op = SLJIT_CONV_F32_FROM_S32;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertWFromS:
|
||||||
|
emit_w_from_s_func(do_cvt_w_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertDFromW:
|
||||||
|
jit_op = SLJIT_CONV_F64_FROM_S32;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertWFromD:
|
||||||
|
emit_w_from_d_func(do_cvt_w_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertDFromS:
|
||||||
|
jit_op = SLJIT_CONV_F64_FROM_F32;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertSFromD:
|
||||||
|
// SLJIT_CONV_F32_FROM_F64 uses the current rounding mode, just as CVT_S_D does.
|
||||||
|
jit_op = SLJIT_CONV_F32_FROM_F64;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertDFromL:
|
||||||
|
jit_op = SLJIT_CONV_F64_FROM_SW;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertLFromD:
|
||||||
|
emit_l_from_d_func(do_cvt_l_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertSFromL:
|
||||||
|
jit_op = SLJIT_CONV_F32_FROM_SW;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::ConvertLFromS:
|
||||||
|
emit_l_from_s_func(do_cvt_l_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::TruncateWFromS:
|
||||||
|
// SLJIT_CONV_S32_FROM_F32 rounds towards zero, just as TRUNC_W_S does.
|
||||||
|
jit_op = SLJIT_CONV_S32_FROM_F32;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::TruncateWFromD:
|
||||||
|
// SLJIT_CONV_S32_FROM_F64 rounds towards zero, just as TRUNC_W_D does.
|
||||||
|
jit_op = SLJIT_CONV_S32_FROM_F64;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::TruncateLFromS:
|
||||||
|
// SLJIT_CONV_SW_FROM_F32 rounds towards zero, just as TRUNC_L_S does.
|
||||||
|
jit_op = SLJIT_CONV_SW_FROM_F32;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::TruncateLFromD:
|
||||||
|
// SLJIT_CONV_SW_FROM_F64 rounds towards zero, just as TRUNC_L_D does.
|
||||||
|
jit_op = SLJIT_CONV_SW_FROM_F64;
|
||||||
|
float_op = true;
|
||||||
|
break;
|
||||||
|
case UnaryOpType::RoundWFromS:
|
||||||
|
emit_w_from_s_func(do_round_w_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::RoundWFromD:
|
||||||
|
emit_w_from_d_func(do_round_w_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::RoundLFromS:
|
||||||
|
emit_l_from_s_func(do_round_l_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::RoundLFromD:
|
||||||
|
emit_l_from_d_func(do_round_l_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::CeilWFromS:
|
||||||
|
emit_w_from_s_func(do_ceil_w_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::CeilWFromD:
|
||||||
|
emit_w_from_d_func(do_ceil_w_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::CeilLFromS:
|
||||||
|
emit_l_from_s_func(do_ceil_l_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::CeilLFromD:
|
||||||
|
emit_l_from_d_func(do_ceil_l_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::FloorWFromS:
|
||||||
|
emit_w_from_s_func(do_floor_w_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::FloorWFromD:
|
||||||
|
emit_w_from_d_func(do_floor_w_d);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::FloorLFromS:
|
||||||
|
emit_l_from_s_func(do_floor_l_s);
|
||||||
|
break;
|
||||||
|
case UnaryOpType::FloorLFromD:
|
||||||
|
emit_l_from_d_func(do_floor_l_d);
|
||||||
|
break;
|
||||||
case UnaryOpType::None:
|
case UnaryOpType::None:
|
||||||
jit_op = SLJIT_MOV;
|
jit_op = SLJIT_MOV;
|
||||||
break;
|
break;
|
||||||
default:
|
case UnaryOpType::ToS32:
|
||||||
assert(false);
|
case UnaryOpType::ToInt32:
|
||||||
|
jit_op = SLJIT_MOV_S32;
|
||||||
|
break;
|
||||||
|
// Unary ops that can't be used as a standalone operation
|
||||||
|
case UnaryOpType::ToU32:
|
||||||
|
case UnaryOpType::ToS64:
|
||||||
|
case UnaryOpType::ToU64:
|
||||||
|
case UnaryOpType::Mask5:
|
||||||
|
case UnaryOpType::Mask6:
|
||||||
|
assert(false && "Unsupported unary op");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (float_op) {
|
if (func_float_op) {
|
||||||
|
// Already handled by the lambda.
|
||||||
|
}
|
||||||
|
else if (float_op) {
|
||||||
sljit_emit_fop1(compiler, jit_op, dst, dstw, src, srcw);
|
sljit_emit_fop1(compiler, jit_op, dst, dstw, src, srcw);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -821,7 +1038,7 @@ void N64Recomp::LiveGenerator::process_store_op(const StoreOp& op, const Instruc
|
||||||
void N64Recomp::LiveGenerator::emit_function_start(const std::string& function_name, size_t func_index) const {
|
void N64Recomp::LiveGenerator::emit_function_start(const std::string& function_name, size_t func_index) const {
|
||||||
context->function_name = function_name;
|
context->function_name = function_name;
|
||||||
context->func_labels[func_index] = sljit_emit_label(compiler);
|
context->func_labels[func_index] = sljit_emit_label(compiler);
|
||||||
sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4, 5, 0);
|
sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_FLOAT(1), 5 | SLJIT_ENTER_FLOAT(0), 0);
|
||||||
sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset);
|
sljit_emit_op2(compiler, SLJIT_SUB, Registers::rdram, 0, Registers::rdram, 0, SLJIT_IMM, rdram_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1083,14 +1300,15 @@ void N64Recomp::LiveGenerator::emit_cop0_status_write(int reg) const {
|
||||||
void N64Recomp::LiveGenerator::emit_cop1_cs_read(int reg) const {
|
void N64Recomp::LiveGenerator::emit_cop1_cs_read(int reg) const {
|
||||||
// Skip the read if the target is the zero register.
|
// Skip the read if the target is the zero register.
|
||||||
if (reg != 0) {
|
if (reg != 0) {
|
||||||
// Load ctx into R0.
|
sljit_sw dst;
|
||||||
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, Registers::ctx, 0);
|
sljit_sw dstw;
|
||||||
|
get_gpr_values(reg, dst, dstw);
|
||||||
|
|
||||||
// Call cop1_cs_read.
|
// Call get_cop1_cs.
|
||||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P,32), SLJIT_IMM, sljit_sw(inputs.cop1_cs_read));
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS0(32), SLJIT_IMM, sljit_sw(get_cop1_cs));
|
||||||
|
|
||||||
// Store the result in the output register.
|
// Store the result in the output register.
|
||||||
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(Registers::ctx), get_gpr_context_offset(reg), SLJIT_R0, 0);
|
sljit_emit_op1(compiler, SLJIT_MOV_S32, dst, dstw, SLJIT_RETURN_REG, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1099,12 +1317,11 @@ void N64Recomp::LiveGenerator::emit_cop1_cs_write(int reg) const {
|
||||||
sljit_sw srcw;
|
sljit_sw srcw;
|
||||||
get_gpr_values(reg, src, srcw);
|
get_gpr_values(reg, src, srcw);
|
||||||
|
|
||||||
// Load ctx and the input register value into R0 and R1
|
// Load the input register value into R0.
|
||||||
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, Registers::ctx, 0);
|
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
|
||||||
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, src, srcw);
|
|
||||||
|
|
||||||
// Call cop1_cs_write.
|
// Call set_cop1_cs.
|
||||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P,32), SLJIT_IMM, sljit_sw(inputs.cop1_cs_write));
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS1V(32), SLJIT_IMM, sljit_sw(set_cop1_cs));
|
||||||
}
|
}
|
||||||
|
|
||||||
void N64Recomp::LiveGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const {
|
void N64Recomp::LiveGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const {
|
||||||
|
|
|
@ -227,10 +227,14 @@ TestStats run_test(const std::filesystem::path& tests_dir, const std::string& te
|
||||||
|
|
||||||
auto before_execution = std::chrono::system_clock::now();
|
auto before_execution = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
int old_rounding = fegetround();
|
||||||
|
|
||||||
// Run the generated code.
|
// Run the generated code.
|
||||||
ctx.r29 = 0xFFFFFFFF80000000 + rdram.size() - 0x10; // Set the stack pointer.
|
ctx.r29 = 0xFFFFFFFF80000000 + rdram.size() - 0x10; // Set the stack pointer.
|
||||||
output.functions[start_func_index](rdram.data(), &ctx);
|
output.functions[start_func_index](rdram.data(), &ctx);
|
||||||
|
|
||||||
|
fesetround(old_rounding);
|
||||||
|
|
||||||
auto after_execution = std::chrono::system_clock::now();
|
auto after_execution = std::chrono::system_clock::now();
|
||||||
|
|
||||||
// Check the result of running the code.
|
// Check the result of running the code.
|
||||||
|
|
153
include/recomp.h
153
include/recomp.h
|
@ -4,21 +4,33 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <fenv.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
// Compiler definition to disable inter-procedural optimization, allowing multiple functions to be in a single file without breaking interposition.
|
// Compiler definition to disable inter-procedural optimization, allowing multiple functions to be in a single file without breaking interposition.
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||||
// MSVC's __declspec(noinline) seems to disable inter-procedural optimization entirely, so it's all that's needed.
|
// MSVC's __declspec(noinline) seems to disable inter-procedural optimization entirely, so it's all that's needed.
|
||||||
#define RECOMP_FUNC __declspec(noinline)
|
#define RECOMP_FUNC __declspec(noinline)
|
||||||
|
|
||||||
|
// Use MSVC's fenv_access pragma.
|
||||||
|
#define SET_FENV_ACCESS() _Pragma("fenv_access(on)")
|
||||||
#elif defined(__clang__)
|
#elif defined(__clang__)
|
||||||
// Clang has no dedicated IPO attribute, so we use a combination of other attributes to give the desired behavior.
|
// Clang has no dedicated IPO attribute, so we use a combination of other attributes to give the desired behavior.
|
||||||
// The inline keyword allows multiple definitions during linking, and extern forces clang to emit an externally visible definition.
|
// The inline keyword allows multiple definitions during linking, and extern forces clang to emit an externally visible definition.
|
||||||
// Weak forces Clang to not perform any IPO as the symbol can be interposed, which prevents actual inlining due to the inline keyword.
|
// Weak forces Clang to not perform any IPO as the symbol can be interposed, which prevents actual inlining due to the inline keyword.
|
||||||
// Add noinline on for good measure, which doesn't conflict with the inline keyword as they have different meanings.
|
// Add noinline on for good measure, which doesn't conflict with the inline keyword as they have different meanings.
|
||||||
#define RECOMP_FUNC extern inline __attribute__((weak,noinline))
|
#define RECOMP_FUNC extern inline __attribute__((weak,noinline))
|
||||||
#elif defined(__GNUC__)
|
|
||||||
// Use GCC's attribute for disabling inter-procedural optimizations.
|
// Use the standard STDC FENV_ACCESS pragma.
|
||||||
#define RECOMP_FUNC __attribute__((noipa))
|
#define SET_FENV_ACCESS() _Pragma("STDC FENV_ACCESS ON")
|
||||||
|
#elif defined(__GNUC__) && !defined(__INTEL_COMPILER)
|
||||||
|
// Use GCC's attribute for disabling inter-procedural optimizations. Also enable the rounding-math compiler flag to disable
|
||||||
|
// constant folding so that arithmetic respects the floating point environment. This is needed because gcc doesn't implement
|
||||||
|
// any FENV_ACCESS pragma.
|
||||||
|
#define RECOMP_FUNC __attribute__((noipa, optimize("rounding-math")))
|
||||||
|
|
||||||
|
// There's no FENV_ACCESS pragma in gcc, so this can be empty.
|
||||||
|
#define SET_FENV_ACCESS()
|
||||||
#else
|
#else
|
||||||
#error "No RECOMP_FUNC definition for this compiler"
|
#error "No RECOMP_FUNC definition for this compiler"
|
||||||
#endif
|
#endif
|
||||||
|
@ -26,18 +38,15 @@
|
||||||
// Implementation of 64-bit multiply and divide instructions
|
// Implementation of 64-bit multiply and divide instructions
|
||||||
#if defined(__SIZEOF_INT128__)
|
#if defined(__SIZEOF_INT128__)
|
||||||
|
|
||||||
typedef __int128 int128_t;
|
|
||||||
typedef unsigned __int128 uint128_t;
|
|
||||||
|
|
||||||
static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) {
|
static inline void DMULT(int64_t a, int64_t b, int64_t* lo64, int64_t* hi64) {
|
||||||
int128_t full128 = ((int128_t)a) * ((int128_t)b);
|
__int128 full128 = ((__int128)a) * ((__int128)b);
|
||||||
|
|
||||||
*hi64 = (int64_t)(full128 >> 64);
|
*hi64 = (int64_t)(full128 >> 64);
|
||||||
*lo64 = (int64_t)(full128 >> 0);
|
*lo64 = (int64_t)(full128 >> 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) {
|
static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64) {
|
||||||
uint128_t full128 = ((uint128_t)a) * ((uint128_t)b);
|
unsigned __int128 full128 = ((unsigned __int128)a) * ((unsigned __int128)b);
|
||||||
|
|
||||||
*hi64 = (uint64_t)(full128 >> 64);
|
*hi64 = (uint64_t)(full128 >> 64);
|
||||||
*lo64 = (uint64_t)(full128 >> 0);
|
*lo64 = (uint64_t)(full128 >> 0);
|
||||||
|
@ -62,7 +71,7 @@ static inline void DMULTU(uint64_t a, uint64_t b, uint64_t* lo64, uint64_t* hi64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void DDIV(int64_t a, int64_t b, int64_t* quot, int64_t* rem) {
|
static inline void DDIV(int64_t a, int64_t b, int64_t* quot, int64_t* rem) {
|
||||||
bool overflow = ((uint64_t)a == 0x8000000000000000ull) && (b == -1ll);
|
int overflow = ((uint64_t)a == 0x8000000000000000ull) && (b == -1ll);
|
||||||
*quot = overflow ? a : (a / b);
|
*quot = overflow ? a : (a / b);
|
||||||
*rem = overflow ? 0 : (a % b);
|
*rem = overflow ? 0 : (a % b);
|
||||||
}
|
}
|
||||||
|
@ -178,6 +187,50 @@ static inline void do_swr(uint8_t* rdram, gpr offset, gpr reg, gpr val) {
|
||||||
MEM_W(0, word_address) = masked_initial_value | shifted_input_value;
|
MEM_W(0, word_address) = masked_initial_value | shifted_input_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint32_t get_cop1_cs() {
|
||||||
|
uint32_t rounding_mode = 0;
|
||||||
|
switch (fegetround()) {
|
||||||
|
// round to nearest value
|
||||||
|
case FE_TONEAREST:
|
||||||
|
default:
|
||||||
|
rounding_mode = 0;
|
||||||
|
break;
|
||||||
|
// round to zero (truncate)
|
||||||
|
case FE_TOWARDZERO:
|
||||||
|
rounding_mode = 1;
|
||||||
|
break;
|
||||||
|
// round to positive infinity (ceil)
|
||||||
|
case FE_UPWARD:
|
||||||
|
rounding_mode = 2;
|
||||||
|
break;
|
||||||
|
// round to negative infinity (floor)
|
||||||
|
case FE_DOWNWARD:
|
||||||
|
rounding_mode = 3;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return rounding_mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void set_cop1_cs(uint32_t val) {
|
||||||
|
uint32_t rounding_mode = val & 0x3;
|
||||||
|
int round = FE_TONEAREST;
|
||||||
|
switch (rounding_mode) {
|
||||||
|
case 0: // round to nearest value
|
||||||
|
round = FE_TONEAREST;
|
||||||
|
break;
|
||||||
|
case 1: // round to zero (truncate)
|
||||||
|
round = FE_TOWARDZERO;
|
||||||
|
break;
|
||||||
|
case 2: // round to positive infinity (ceil)
|
||||||
|
round = FE_UPWARD;
|
||||||
|
break;
|
||||||
|
case 3: // round to negative infinity (floor)
|
||||||
|
round = FE_DOWNWARD;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fesetround(round);
|
||||||
|
}
|
||||||
|
|
||||||
#define S32(val) \
|
#define S32(val) \
|
||||||
((int32_t)(val))
|
((int32_t)(val))
|
||||||
|
|
||||||
|
@ -234,77 +287,37 @@ static inline void do_swr(uint8_t* rdram, gpr offset, gpr reg, gpr val) {
|
||||||
|
|
||||||
#define DEFAULT_ROUNDING_MODE 0
|
#define DEFAULT_ROUNDING_MODE 0
|
||||||
|
|
||||||
static inline int32_t do_cvt_w_s(float val, unsigned int rounding_mode) {
|
static inline int32_t do_cvt_w_s(float val) {
|
||||||
switch (rounding_mode) {
|
// Rounding mode aware float to 32-bit int conversion.
|
||||||
case 0: // round to nearest value
|
return (int32_t)lrintf(val);
|
||||||
return (int32_t)lroundf(val);
|
|
||||||
case 1: // round to zero (truncate)
|
|
||||||
return (int32_t)val;
|
|
||||||
case 2: // round to positive infinity (ceil)
|
|
||||||
return (int32_t)ceilf(val);
|
|
||||||
case 3: // round to negative infinity (floor)
|
|
||||||
return (int32_t)floorf(val);
|
|
||||||
}
|
|
||||||
assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CVT_W_S(val) \
|
#define CVT_W_S(val) \
|
||||||
do_cvt_w_s(val, rounding_mode)
|
do_cvt_w_s(val)
|
||||||
|
|
||||||
static inline int32_t do_cvt_w_d(double val, unsigned int rounding_mode) {
|
static inline int64_t do_cvt_l_s(float val) {
|
||||||
switch (rounding_mode) {
|
// Rounding mode aware float to 64-bit int conversion.
|
||||||
case 0: // round to nearest value
|
return (int64_t)llrintf(val);
|
||||||
return (int32_t)lround(val);
|
|
||||||
case 1: // round to zero (truncate)
|
|
||||||
return (int32_t)val;
|
|
||||||
case 2: // round to positive infinity (ceil)
|
|
||||||
return (int32_t)ceil(val);
|
|
||||||
case 3: // round to negative infinity (floor)
|
|
||||||
return (int32_t)floor(val);
|
|
||||||
}
|
|
||||||
assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CVT_W_D(val) \
|
|
||||||
do_cvt_w_d(val, rounding_mode)
|
|
||||||
|
|
||||||
static inline int64_t do_cvt_l_s(float val, unsigned int rounding_mode) {
|
|
||||||
switch (rounding_mode) {
|
|
||||||
case 0: // round to nearest value
|
|
||||||
return (int64_t)llroundf(val);
|
|
||||||
case 1: // round to zero (truncate)
|
|
||||||
return (int64_t)val;
|
|
||||||
case 2: // round to positive infinity (ceil)
|
|
||||||
return (int64_t)ceilf(val);
|
|
||||||
case 3: // round to negative infinity (floor)
|
|
||||||
return (int64_t)floorf(val);
|
|
||||||
}
|
|
||||||
assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CVT_L_S(val) \
|
#define CVT_L_S(val) \
|
||||||
do_cvt_l_s(val, rounding_mode)
|
do_cvt_l_s(val);
|
||||||
|
|
||||||
static inline int64_t do_cvt_l_d(double val, unsigned int rounding_mode) {
|
static inline int32_t do_cvt_w_d(double val) {
|
||||||
switch (rounding_mode) {
|
// Rounding mode aware double to 32-bit int conversion.
|
||||||
case 0: // round to nearest value
|
return (int32_t)lrint(val);
|
||||||
return (int64_t)llround(val);
|
}
|
||||||
case 1: // round to zero (truncate)
|
|
||||||
return (int64_t)val;
|
#define CVT_W_D(val) \
|
||||||
case 2: // round to positive infinity (ceil)
|
do_cvt_w_d(val)
|
||||||
return (int64_t)ceil(val);
|
|
||||||
case 3: // round to negative infinity (floor)
|
static inline int64_t do_cvt_l_d(double val) {
|
||||||
return (int64_t)floor(val);
|
// Rounding mode aware double to 64-bit int conversion.
|
||||||
}
|
return (int64_t)llrint(val);
|
||||||
assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CVT_L_D(val) \
|
#define CVT_L_D(val) \
|
||||||
do_cvt_l_d(val, rounding_mode)
|
do_cvt_l_d(val)
|
||||||
|
|
||||||
#define NAN_CHECK(val) \
|
#define NAN_CHECK(val) \
|
||||||
assert(val == val)
|
assert(val == val)
|
||||||
|
|
|
@ -48,8 +48,6 @@ namespace N64Recomp {
|
||||||
uint32_t base_event_index;
|
uint32_t base_event_index;
|
||||||
void (*cop0_status_write)(recomp_context* ctx, gpr value);
|
void (*cop0_status_write)(recomp_context* ctx, gpr value);
|
||||||
gpr (*cop0_status_read)(recomp_context* ctx);
|
gpr (*cop0_status_read)(recomp_context* ctx);
|
||||||
void (*cop1_cs_read)(recomp_context* ctx, gpr value);
|
|
||||||
gpr (*cop1_cs_write)(recomp_context* ctx);
|
|
||||||
void (*switch_error)(const char* func, uint32_t vram, uint32_t jtbl);
|
void (*switch_error)(const char* func, uint32_t vram, uint32_t jtbl);
|
||||||
void (*do_break)(uint32_t vram);
|
void (*do_break)(uint32_t vram);
|
||||||
recomp_func_t* (*get_function)(int32_t vram);
|
recomp_func_t* (*get_function)(int32_t vram);
|
||||||
|
|
|
@ -50,12 +50,20 @@ namespace N64Recomp {
|
||||||
ConvertLFromS,
|
ConvertLFromS,
|
||||||
TruncateWFromS,
|
TruncateWFromS,
|
||||||
TruncateWFromD,
|
TruncateWFromD,
|
||||||
|
TruncateLFromS,
|
||||||
|
TruncateLFromD,
|
||||||
RoundWFromS,
|
RoundWFromS,
|
||||||
RoundWFromD,
|
RoundWFromD,
|
||||||
|
RoundLFromS,
|
||||||
|
RoundLFromD,
|
||||||
CeilWFromS,
|
CeilWFromS,
|
||||||
CeilWFromD,
|
CeilWFromD,
|
||||||
|
CeilLFromS,
|
||||||
|
CeilLFromD,
|
||||||
FloorWFromS,
|
FloorWFromS,
|
||||||
FloorWFromD
|
FloorWFromD,
|
||||||
|
FloorLFromS,
|
||||||
|
FloorLFromD
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class BinaryOpType {
|
enum class BinaryOpType {
|
||||||
|
|
|
@ -289,24 +289,48 @@ void N64Recomp::CGenerator::get_operand_string(Operand operand, UnaryOpType oper
|
||||||
case UnaryOpType::TruncateWFromD:
|
case UnaryOpType::TruncateWFromD:
|
||||||
operand_string = "TRUNC_W_D(" + operand_string + ")";
|
operand_string = "TRUNC_W_D(" + operand_string + ")";
|
||||||
break;
|
break;
|
||||||
|
case UnaryOpType::TruncateLFromS:
|
||||||
|
operand_string = "TRUNC_L_S(" + operand_string + ")";
|
||||||
|
break;
|
||||||
|
case UnaryOpType::TruncateLFromD:
|
||||||
|
operand_string = "TRUNC_L_D(" + operand_string + ")";
|
||||||
|
break;
|
||||||
case UnaryOpType::RoundWFromS:
|
case UnaryOpType::RoundWFromS:
|
||||||
operand_string = "lroundf(" + operand_string + ")";
|
operand_string = "lroundf(" + operand_string + ")";
|
||||||
break;
|
break;
|
||||||
case UnaryOpType::RoundWFromD:
|
case UnaryOpType::RoundWFromD:
|
||||||
operand_string = "lround(" + operand_string + ")";
|
operand_string = "lround(" + operand_string + ")";
|
||||||
break;
|
break;
|
||||||
|
case UnaryOpType::RoundLFromS:
|
||||||
|
operand_string = "llroundf(" + operand_string + ")";
|
||||||
|
break;
|
||||||
|
case UnaryOpType::RoundLFromD:
|
||||||
|
operand_string = "llround(" + operand_string + ")";
|
||||||
|
break;
|
||||||
case UnaryOpType::CeilWFromS:
|
case UnaryOpType::CeilWFromS:
|
||||||
operand_string = "S32(ceilf(" + operand_string + "))";
|
operand_string = "S32(ceilf(" + operand_string + "))";
|
||||||
break;
|
break;
|
||||||
case UnaryOpType::CeilWFromD:
|
case UnaryOpType::CeilWFromD:
|
||||||
operand_string = "S32(ceil(" + operand_string + "))";
|
operand_string = "S32(ceil(" + operand_string + "))";
|
||||||
break;
|
break;
|
||||||
|
case UnaryOpType::CeilLFromS:
|
||||||
|
operand_string = "S64(ceilf(" + operand_string + "))";
|
||||||
|
break;
|
||||||
|
case UnaryOpType::CeilLFromD:
|
||||||
|
operand_string = "S64(ceil(" + operand_string + "))";
|
||||||
|
break;
|
||||||
case UnaryOpType::FloorWFromS:
|
case UnaryOpType::FloorWFromS:
|
||||||
operand_string = "S32(floorf(" + operand_string + "))";
|
operand_string = "S32(floorf(" + operand_string + "))";
|
||||||
break;
|
break;
|
||||||
case UnaryOpType::FloorWFromD:
|
case UnaryOpType::FloorWFromD:
|
||||||
operand_string = "S32(floor(" + operand_string + "))";
|
operand_string = "S32(floor(" + operand_string + "))";
|
||||||
break;
|
break;
|
||||||
|
case UnaryOpType::FloorLFromS:
|
||||||
|
operand_string = "S64(floorf(" + operand_string + "))";
|
||||||
|
break;
|
||||||
|
case UnaryOpType::FloorLFromD:
|
||||||
|
operand_string = "S64(floor(" + operand_string + "))";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,7 +391,6 @@ void N64Recomp::CGenerator::emit_function_start(const std::string& function_name
|
||||||
"RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n"
|
"RECOMP_FUNC void {}(uint8_t* rdram, recomp_context* ctx) {{\n"
|
||||||
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
|
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
|
||||||
" uint64_t hi = 0, lo = 0, result = 0;\n"
|
" uint64_t hi = 0, lo = 0, result = 0;\n"
|
||||||
" unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n"
|
|
||||||
" int c1cs = 0;\n", // cop1 conditional signal
|
" int c1cs = 0;\n", // cop1 conditional signal
|
||||||
function_name);
|
function_name);
|
||||||
}
|
}
|
||||||
|
@ -461,11 +484,11 @@ void N64Recomp::CGenerator::emit_cop0_status_write(int reg) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void N64Recomp::CGenerator::emit_cop1_cs_read(int reg) const {
|
void N64Recomp::CGenerator::emit_cop1_cs_read(int reg) const {
|
||||||
fmt::print(output_file, "{} = rounding_mode;\n", gpr_to_string(reg));
|
fmt::print(output_file, "{} = get_cop1_cs();\n", gpr_to_string(reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
void N64Recomp::CGenerator::emit_cop1_cs_write(int reg) const {
|
void N64Recomp::CGenerator::emit_cop1_cs_write(int reg) const {
|
||||||
fmt::print(output_file, "rounding_mode = ({}) & 0x3;\n", gpr_to_string(reg));
|
fmt::print(output_file, "set_cop1_cs({});\n", gpr_to_string(reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
void N64Recomp::CGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const {
|
void N64Recomp::CGenerator::emit_muldiv(InstrId instr_id, int reg1, int reg2) const {
|
||||||
|
|
Loading…
Add table
Reference in a new issue