From dbf0e623c8a1cbb3e73237803a8bcc29f32816d6 Mon Sep 17 00:00:00 2001 From: Tharo <17233964+Thar0@users.noreply.github.com> Date: Sat, 11 May 2024 19:08:15 +0100 Subject: [PATCH 1/7] Implement doubleword multiply and divide, and conversions between doubleword and single/double precision floats (#16) --- src/main.cpp | 4 ---- src/recompilation.cpp | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 3cdf1dd..10231ca 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -141,10 +141,6 @@ std::unordered_set reimplemented_funcs{ "__osInitialize_kmc", "__osInitialize_isv", "__osRdbSend", - // libgcc math routines (these throw off the recompiler) - "__udivdi3", - "__divdi3", - "__umoddi3", // ido math routines "__ull_div", "__ll_div", diff --git a/src/recompilation.cpp b/src/recompilation.cpp index e525278..38ad6ff 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -386,16 +386,28 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_mult: print_line("result = S64(S32({}{})) * S64(S32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_dmult: + print_line("DMULT(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_multu: print_line("result = U64(U32({}{})) * U64(U32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_dmultu: + print_line("DMULTU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_div: // Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1 print_line("lo = S32(S64(S32({}{})) / S64(S32({}{}))); hi = S32(S64(S32({}{})) % S64(S32({}{})))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_ddiv: + print_line("DDIV(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_divu: print_line("lo = S32(U32({}{}) / U32({}{})); hi = S32(U32({}{}) % U32({}{}))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_ddivu: + print_line("DDIVU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_mflo: print_line("{}{} = lo", ctx_gpr_prefix(rd), rd); break; @@ -929,6 +941,28 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C print_line("NAN_CHECK(ctx->f{}.d)", fs); print_line("ctx->f{}.fl = CVT_S_D(ctx->f{}.d)", fd, fs); break; + case InstrId::cpu_cvt_d_l: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("ctx->f{}.d = CVT_D_L(ctx->f{}.u64)", fd, fs); + break; + case InstrId::cpu_cvt_l_d: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("NAN_CHECK(ctx->f{}.d)", fs); + print_line("ctx->f{}.u64 = CVT_L_D(ctx->f{}.d)", fd, fs); + break; + case InstrId::cpu_cvt_s_l: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("ctx->f{}.fl = CVT_S_L(ctx->f{}.u64)", fd, fs); + break; + case InstrId::cpu_cvt_l_s: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("NAN_CHECK(ctx->f{}.fl)", fs); + print_line("ctx->f{}.u64 = CVT_L_S(ctx->f{}.fl)", fd, fs); + break; case InstrId::cpu_trunc_w_s: print_line("CHECK_FR(ctx, {})", fd); print_line("CHECK_FR(ctx, {})", fs); From 3ab0edf18a2520b0762546002bc2e2cef69ed5a7 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Sun, 12 May 2024 20:30:02 -0400 Subject: [PATCH 2/7] Changed RSPRecomp to take a toml config file instead of using hardcoded options --- CMakeLists.txt | 1 + RSPRecomp/src/rsp_recomp.cpp | 136 +++++++++++++++++++++++++++-------- 2 files changed, 108 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dfaf460..7eab156 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,6 +85,7 @@ target_include_directories(RSPRecomp PRIVATE "${CMAKE_SOURCE_DIR}/lib/rabbitizer/include" "${CMAKE_SOURCE_DIR}/lib/rabbitizer/cplusplus/include" "${CMAKE_SOURCE_DIR}/lib/fmt/include" + "${CMAKE_SOURCE_DIR}/lib/toml11" "${CMAKE_SOURCE_DIR}/include") target_link_libraries(RSPRecomp fmt rabbitizer) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index 9e06719..343e84a 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -9,6 +9,7 @@ #include "rabbitizer.hpp" #include "fmt/format.h" #include "fmt/ostream.h" +#include "toml.hpp" using InstrId = rabbitizer::InstrId::UniqueId; using Cop0Reg = rabbitizer::Registers::Rsp::Cop0; @@ -565,22 +566,6 @@ void write_indirect_jumps(std::ofstream& output_file, const BranchTargets& branc //const std::vector extra_indirect_branch_targets{ 0x1F80, 0x1250, 0x1154, 0x1094, 0x1E0C, 0x1514, 0x1E7C, 0x1C90, 0x1180, 0x1808, 0x11E8, 0x1ADC, 0x1B6C, 0x1194, 0x1EF8, 0x1240, 0x17C0, 0x186C, 0x1A58, 0x18BC, 0x1ABC, 0x1ACC, 0x1A80, 0x1BD4 }; //const std::unordered_set unsupported_instructions{}; -// BT n_aspMain -constexpr size_t rsp_text_offset = 0x1E4F3B0; -constexpr size_t rsp_text_size = 0xF80; -constexpr size_t rsp_text_address = 0x04001080; -std::string rom_file_path = "../../BTRecomp/banjotooie.decompressed.us.z64"; // uncompressed rom! -std::string output_file_path = "../../BTRecomp/rsp/n_aspMain.cpp"; -std::string output_function_name = "n_aspMain"; -const std::vector extra_indirect_branch_targets{ - // dispatch table - 0x1AE8, 0x143C, 0x1240, 0x1D84, 0x126C, 0x1B20, 0x12A8, 0x1214, 0x141C, 0x1310, 0x13CC, 0x12E4, 0x1FB0, 0x1358, 0x16EC, 0x1408 -}; -const std::unordered_set unsupported_instructions{ - // cmd_MP3 - 0x00001214 -}; - #ifdef _MSC_VER inline uint32_t byteswap(uint32_t val) { return _byteswap_ulong(val); @@ -591,20 +576,113 @@ constexpr uint32_t byteswap(uint32_t val) { } #endif -static_assert((rsp_text_size / instr_size) * instr_size == rsp_text_size, "RSP microcode must be a multiple of the instruction size"); +struct RSPRecompilerConfig { + size_t text_offset; + size_t text_size; + size_t text_address; + std::filesystem::path rom_file_path; + std::filesystem::path output_file_path; + std::string output_function_name; + std::vector extra_indirect_branch_targets; + std::unordered_set unsupported_instructions; +}; -int main() { - std::array instr_words{}; +std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { + if (!child.empty()) { + return parent / child; + } + return child; +} + +template +std::vector toml_to_vec(const toml::value& branch_targets_data) { + std::vector ret; + + if (branch_targets_data.type() != toml::value_t::array) { + return ret; + } + + // Get the funcs array as an array type. + const std::vector& branch_targets_array = branch_targets_data.as_array(); + + // Reserve room for all the funcs in the map. + ret.reserve(branch_targets_array.size()); + for (const toml::value& cur_target_val : branch_targets_array) { + ret.push_back(cur_target_val.as_integer()); + } + + return ret; +} + +bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) { + std::ifstream config_file {config_path}; + RSPRecompilerConfig ret{}; + + try { + const toml::value config_data = toml::parse(config_path); + std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path(); + + ret.text_offset = toml::find(config_data, "text_offset"); + ret.text_size = toml::find(config_data, "text_size"); + ret.text_address = toml::find(config_data, "text_address"); + + ret.rom_file_path = concat_if_not_empty(basedir, toml::find(config_data, "rom_file_path")); + ret.output_file_path = concat_if_not_empty(basedir, toml::find(config_data, "output_file_path")); + ret.output_function_name = toml::find(config_data, "output_function_name"); + + // Extra indirect branch targets (optional) + const toml::value& branch_targets_data = toml::find_or(config_data, "extra_indirect_branch_targets", toml::value{}); + if (branch_targets_data.type() != toml::value_t::empty) { + ret.extra_indirect_branch_targets = toml_to_vec(branch_targets_data); + } + + // Unsupported_instructions (optional) + const toml::value& unsupported_instructions_data = toml::find_or(config_data, "unsupported_instructions_data", toml::value{}); + if (unsupported_instructions_data.type() != toml::value_t::empty) { + ret.extra_indirect_branch_targets = toml_to_vec(unsupported_instructions_data); + } + } + catch (const toml::syntax_error& err) { + fmt::print(stderr, "Syntax error in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const toml::type_error& err) { + fmt::print(stderr, "Incorrect type in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const std::out_of_range& err) { + fmt::print(stderr, "Missing value in config file, full error:\n{}\n", err.what()); + return false; + } + + out = ret; + return true; +} + +int main(int argc, const char** argv) { + if (argc != 2) { + fmt::print("Usage: {} [config file]\n", argv[0]); + std::exit(EXIT_SUCCESS); + } + + RSPRecompilerConfig config; + if (!read_config(std::filesystem::path{argv[1]}, config)) { + fmt::print("Failed to parse config file {}\n", argv[0]); + std::exit(EXIT_FAILURE); + } + + std::vector instr_words{}; + instr_words.resize(config.text_size / sizeof(uint32_t)); { - std::ifstream rom_file{ rom_file_path, std::ios_base::binary }; + std::ifstream rom_file{ config.rom_file_path, std::ios_base::binary }; if (!rom_file.good()) { fmt::print(stderr, "Failed to open rom file\n"); return EXIT_FAILURE; } - rom_file.seekg(rsp_text_offset); - rom_file.read(reinterpret_cast(instr_words.data()), rsp_text_size); + rom_file.seekg(config.text_offset); + rom_file.read(reinterpret_cast(instr_words.data()), config.text_size); } // Disable appropriate pseudo instructions @@ -616,7 +694,7 @@ int main() { // Decode the instruction words into instructions std::vector instrs{}; instrs.reserve(instr_words.size()); - uint32_t vram = rsp_text_address & rsp_mem_mask; + uint32_t vram = config.text_address & rsp_mem_mask; for (uint32_t instr_word : instr_words) { const rabbitizer::InstructionRsp& instr = instrs.emplace_back(byteswap(instr_word), vram); vram += instr_size; @@ -626,13 +704,13 @@ int main() { BranchTargets branch_targets = get_branch_targets(instrs); // Add any additional indirect branch targets that may not be found directly in the code (e.g. from a jump table) - for (uint32_t target : extra_indirect_branch_targets) { + for (uint32_t target : config.extra_indirect_branch_targets) { branch_targets.indirect_targets.insert(target); } // Open output file and write beginning - std::filesystem::create_directories(std::filesystem::path{ output_file_path }.parent_path()); - std::ofstream output_file(output_file_path); + std::filesystem::create_directories(std::filesystem::path{ config.output_file_path }.parent_path()); + std::ofstream output_file(config.output_file_path); fmt::print(output_file, "#include \"rsp.h\"\n" "#include \"rsp_vu_impl.h\"\n" @@ -643,17 +721,17 @@ int main() { " uint32_t r24 = 0, r25 = 0, r26 = 0, r27 = 0, r28 = 0, r29 = 0, r30 = 0, r31 = 0;\n" " uint32_t dma_dmem_address = 0, dma_dram_address = 0, jump_target = 0;\n" " RSP rsp{{}};\n" - " r1 = 0xFC0;\n", output_function_name); + " r1 = 0xFC0;\n", config.output_function_name); // Write each instruction for (size_t instr_index = 0; instr_index < instrs.size(); instr_index++) { - process_instruction(instr_index, instrs, output_file, branch_targets, unsupported_instructions, false, false); + process_instruction(instr_index, instrs, output_file, branch_targets, config.unsupported_instructions, false, false); } // Terminate instruction code with a return to indicate that the microcode has run past its end fmt::print(output_file, " return RspExitReason::ImemOverrun;\n"); // Write the section containing the indirect jump table - write_indirect_jumps(output_file, branch_targets, output_function_name); + write_indirect_jumps(output_file, branch_targets, config.output_function_name); // End the file fmt::print(output_file, "}}\n"); From d7b223fde51c651d2e1aa856efaf19793a77e5e7 Mon Sep 17 00:00:00 2001 From: Anghelo Carvajal Date: Sun, 12 May 2024 22:07:46 -0400 Subject: [PATCH 3/7] Implement `bgezal`/`bgezall` (#23) --- src/main.cpp | 1 + src/recompilation.cpp | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index 10231ca..ed5be02 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1182,6 +1182,7 @@ int main(int argc, char** argv) { RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false; RabbitizerConfig_Cfg.pseudos.pseudoBnez = false; RabbitizerConfig_Cfg.pseudos.pseudoNot = false; + RabbitizerConfig_Cfg.pseudos.pseudoBal = false; std::vector relocatable_sections_ordered{}; diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 38ad6ff..8c5bf4e 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -638,6 +638,15 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_break: print_line("do_break({})", instr_vram); break; + case InstrId::cpu_bgezall: + is_branch_likely = true; + [[fallthrough]]; + case InstrId::cpu_bgezal: + print_indent(); + print_branch_condition("if (SIGNED({}{}) >= 0) {{", ctx_gpr_prefix(rs), rs); + print_func_call(instr.getBranchVramGeneric()); + print_line("}}"); + break; // Cop1 loads/stores case InstrId::cpu_mtc1: From 706e7c50691d52681d7bb05d7563d871d9e429ac Mon Sep 17 00:00:00 2001 From: David Chavez Date: Tue, 14 May 2024 02:55:43 +0200 Subject: [PATCH 4/7] Add Initializers for Structs - Fix issue with Apple Clang (#31) Fixes #30 also adds CI --- .github/workflows/validate.yml | 59 ++++++++++++++++++++++++++++++++++ .gitignore | 2 ++ CMakeLists.txt | 1 - include/recomp_port.h | 13 ++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/validate.yml diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml new file mode 100644 index 0000000..85723cb --- /dev/null +++ b/.github/workflows/validate.yml @@ -0,0 +1,59 @@ +name: validate +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + type: [ Debug, Release ] + os: [ ubuntu-latest, windows-latest, macos-13, macos-14 ] # macOS 13 is intel and macOS 14 is arm + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ runner.os }}-N64Recomp-ccache + - name: Install Windows Dependencies + if: runner.os == 'Windows' + run: | + choco install ninja + Remove-Item -Path "C:\ProgramData\Chocolatey\bin\ccache.exe" -Force -ErrorAction SilentlyContinue + - name: Install Linux Dependencies + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y ninja-build + - name: Install macOS Dependencies + if: runner.os == 'macOS' + run: | + brew install ninja + - name: Configure Developer Command Prompt + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + - name: Build N64Recomp (Unix) + if: runner.os != 'Windows' + run: |- + # enable ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build + cmake --build cmake-build --config Debug --target N64Recomp -j 8 + - name: Build N64Recomp (Windows) + if: runner.os == 'Windows' + run: |- + # enable ccache + set $env:PATH="$env:USERPROFILE/.cargo/bin;$env:PATH" + + cmake -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build + cmake --build cmake-build --config Debug --target N64Recomp -j 8 diff --git a/.gitignore b/.gitignore index 594dcb7..08f2896 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,5 @@ test/RT64 # Runtime files imgui.ini rt64.log +.idea +cmake-build* diff --git a/CMakeLists.txt b/CMakeLists.txt index 7eab156..4c06943 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,4 @@ cmake_minimum_required(VERSION 3.20) -project(Zelda64Recompiled) set(CMAKE_C_STANDARD 17) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/include/recomp_port.h b/include/recomp_port.h index 0140d2f..9c4b11b 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,8 @@ namespace RecompPort { struct FunctionSize { std::string func_name; uint32_t size_bytes; + + FunctionSize(const std::string& func_name, uint32_t size_bytes) : func_name(func_name), size_bytes(size_bytes) {} }; struct ManualFunction { @@ -49,6 +52,8 @@ namespace RecompPort { std::string section_name; uint32_t vram; uint32_t size; + + ManualFunction(const std::string& func_name, std::string section_name, uint32_t vram, uint32_t size) : func_name(func_name), section_name(std::move(section_name)), vram(vram), size(size) {} }; struct Config { @@ -82,11 +87,16 @@ namespace RecompPort { uint32_t addu_vram; uint32_t jr_vram; std::vector entries; + + JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, std::vector&& entries) + : vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), entries(std::move(entries)) {} }; struct AbsoluteJump { uint32_t jump_target; uint32_t instruction_vram; + + AbsoluteJump(uint32_t jump_target, uint32_t instruction_vram) : jump_target(jump_target), instruction_vram(instruction_vram) {} }; struct Function { @@ -98,6 +108,9 @@ namespace RecompPort { bool ignored; bool reimplemented; bool stubbed; + + Function(uint32_t vram, uint32_t rom, std::vector words, std::string name, ELFIO::Elf_Half section_index, bool ignored = false, bool reimplemented = false, bool stubbed = false) + : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {} }; enum class RelocType : uint8_t { From d4fab15fcc782e665260428974a4d5805ac95e88 Mon Sep 17 00:00:00 2001 From: Tharo <17233964+Thar0@users.noreply.github.com> Date: Tue, 14 May 2024 02:00:00 +0100 Subject: [PATCH 5/7] RSPRecomp: Add some features to help debug unhandled indirect jump targets (file+line of instruction and register dump) (#33) --- RSPRecomp/src/rsp_recomp.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index 343e84a..4cdfd65 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -460,10 +460,12 @@ bool process_instruction(size_t instr_index, const std::vector Date: Tue, 14 May 2024 15:06:47 -0400 Subject: [PATCH 6/7] Add more `renamed_funcs` (#35) --- src/main.cpp | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index ed5be02..81c6b15 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -505,6 +505,9 @@ std::unordered_set ignored_funcs { "__divdi3", "__udivdi3", "__umoddi3", + "div64_64", + "div64_32", + "__moddi3", // ido math routines "__ll_div", "__ll_lshift", @@ -534,15 +537,25 @@ std::unordered_set ignored_funcs { }; std::unordered_set renamed_funcs{ + // Math "sincosf", "sinf", "cosf", "__sinf", "__cosf", + "asinf", + "acosf", + "atanf", + "atan2f", + "tanf", "sqrt", "sqrtf", + + // Memory "memcpy", "memset", + "memmove", + "memcmp", "strcmp", "strcat", "strcpy", @@ -553,8 +566,12 @@ std::unordered_set renamed_funcs{ "bzero", "bcopy", "bcmp", + + // long jumps "setjmp", "longjmp", + + // Math 2 "ldiv", "lldiv", "ceil", @@ -562,6 +579,8 @@ std::unordered_set renamed_funcs{ "floor", "floorf", "fmodf", + "fmod", + "modf", "lround", "lroundf", "nearbyint", @@ -570,11 +589,43 @@ std::unordered_set renamed_funcs{ "roundf", "trunc", "truncf", + + // printf family "vsprintf", + "gcvt", + "fcvt", + "ecvt", + "__assert", + + // allocations "malloc", "free", "realloc", + "calloc", + + // rand + "rand", + "srand", + "random", + + // gzip + "huft_build", + "huft_free", + "inflate_codes", + "inflate_stored", + "inflate_fixed", + "inflate_dynamic", + "inflate_block", + "inflate", + "expand_gzip", + "auRomDataRead" + "data_write", + "unzip", + "updcrc", + "clear_bufs", + "fill_inbuf", + "flush_window", }; bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) { From 26c5c2cbb844b0f6a3f7c0d1440273e499ee2194 Mon Sep 17 00:00:00 2001 From: Anghelo Carvajal Date: Tue, 14 May 2024 18:56:23 -0400 Subject: [PATCH 7/7] Implement tail calls in the middle of functions and allow recomping libgcc math routines (#43) --- .gitignore | 1 + src/main.cpp | 18 ++++---- src/recompilation.cpp | 105 ++++++++++++++++++++++++++---------------- 3 files changed, 77 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index 08f2896..147d789 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # VSCode file settings .vscode/settings.json +.vscode/c_cpp_properties.json # Input elf and rom files *.elf diff --git a/src/main.cpp b/src/main.cpp index 81c6b15..6cb8fd2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -500,14 +500,7 @@ std::unordered_set ignored_funcs { "rmonGetRcpRegister", "kdebugserver", "send", - // libgcc math routines (these throw off the recompiler) - "__muldi3", - "__divdi3", - "__udivdi3", - "__umoddi3", - "div64_64", - "div64_32", - "__moddi3", + // ido math routines "__ll_div", "__ll_lshift", @@ -626,6 +619,15 @@ std::unordered_set renamed_funcs{ "clear_bufs", "fill_inbuf", "flush_window", + + // libgcc math routines + "__muldi3", + "__divdi3", + "__udivdi3", + "__umoddi3", + "div64_64", + "div64_32", + "__moddi3", }; bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) { diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 8c5bf4e..4b29725 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -47,6 +47,8 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C uint32_t reloc_section = 0; uint32_t reloc_target_section_offset = 0; + uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]); + // Check if this instruction has a reloc. if (section.relocatable && section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) { // Get the reloc data for this instruction @@ -104,27 +106,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } }; - auto print_branch = [&](fmt::format_string fmt_str, Ts ...args) { - fmt::print(output_file, "{{\n "); - if (instr_index < instructions.size() - 1) { - bool dummy_needs_link_branch; - bool dummy_is_branch_likely; - size_t next_reloc_index = reloc_index; - uint32_t next_vram = instr_vram + 4; - if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { - next_reloc_index++; - } - process_instruction(context, config, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out); - } - fmt::print(output_file, " "); - fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...)); - if (needs_link_branch) { - fmt::print(output_file, ";\n goto after_{}", link_branch_index); - } - fmt::print(output_file, ";\n }}\n"); - }; - - auto print_func_call = [&](uint32_t target_func_vram) { + auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true) { const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram); std::string jal_target_name; uint32_t section_vram_start = section.ram_addr; @@ -190,11 +172,46 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C return false; } } - needs_link_branch = true; + needs_link_branch = link_branch; print_unconditional_branch("{}(rdram, ctx)", jal_target_name); return true; }; + auto print_branch = [&](uint32_t branch_target) { + if (branch_target < func.vram || branch_target >= func_vram_end) { + // FIXME: how to deal with static functions? + if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { + fmt::print(output_file, "{{\n "); + fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); + print_func_call(branch_target, false); + print_line("return"); + fmt::print(output_file, ";\n }}\n"); + return; + } + + fmt::print(stderr, "[Warn] Function {} is branching outside of the function (to 0x{:08X})\n", func.name, branch_target); + } + + fmt::print(output_file, "{{\n "); + if (instr_index < instructions.size() - 1) { + bool dummy_needs_link_branch; + bool dummy_is_branch_likely; + size_t next_reloc_index = reloc_index; + uint32_t next_vram = instr_vram + 4; + if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { + next_reloc_index++; + } + process_instruction(context, config, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out); + } + + fmt::print(output_file, " "); + fmt::print(output_file, "goto L_{:08X}", branch_target); + if (needs_link_branch) { + fmt::print(output_file, ";\n goto after_{}", link_branch_index); + } + fmt::print(output_file, ";\n }}\n"); + }; + if (indent) { print_indent(); } @@ -216,8 +233,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C std::string unsigned_imm_string; std::string signed_imm_string; - uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]); - if (!at_reloc) { unsigned_imm_string = fmt::format("{:#X}", imm); signed_imm_string = fmt::format("{:#X}", (int16_t)imm); @@ -492,7 +507,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_swr: print_line("do_swr(rdram, {}, {}{}, {}{})", signed_imm_string, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); break; - + // Branches case InstrId::cpu_jal: print_func_call(instr.getBranchVramGeneric()); @@ -511,16 +526,28 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C { uint32_t branch_target = instr.getBranchVramGeneric(); if (branch_target == instr_vram) { - print_line("void pause_self(uint8_t *rdram); pause_self(rdram)"); + print_line("pause_self(rdram)"); } // Check if the branch is within this function else if (branch_target >= func.vram && branch_target < func_vram_end) { print_unconditional_branch("goto L_{:08X}", branch_target); } - // Otherwise, check if it's a tail call - else if (instr_vram == func_vram_end - 2 * sizeof(func.words[0])) { - fmt::print("Tail call in {}\n", func.name); - print_func_call(branch_target); + // This may be a tail call in the middle of the control flow due to a previous check + // For example: + // ```c + // void test() { + // if (SOME_CONDITION) { + // do_a(); + // } else { + // do_b(); + // } + // } + // ``` + // FIXME: how to deal with static functions? + else if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { + fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); + print_func_call(branch_target, false); + print_line("return"); } else { fmt::print(stderr, "Unhandled branch in {} at 0x{:08X} to 0x{:08X}\n", func.name, instr_vram, branch_target); @@ -536,7 +563,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C [instr_vram](const RecompPort::JumpTable& jtbl) { return jtbl.jr_vram == instr_vram; }); - + if (jtbl_find_result != stats.jump_tables.end()) { const RecompPort::JumpTable& cur_jtbl = *jtbl_find_result; bool dummy_needs_link_branch, dummy_is_branch_likely; @@ -593,7 +620,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bne: print_indent(); print_branch_condition("if ({}{} != {}{})", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_beql: is_branch_likely = true; @@ -601,7 +628,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_beq: print_indent(); print_branch_condition("if ({}{} == {}{})", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bgezl: is_branch_likely = true; @@ -609,7 +636,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bgez: print_indent(); print_branch_condition("if (SIGNED({}{}) >= 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bgtzl: is_branch_likely = true; @@ -617,7 +644,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bgtz: print_indent(); print_branch_condition("if (SIGNED({}{}) > 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_blezl: is_branch_likely = true; @@ -625,7 +652,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_blez: print_indent(); print_branch_condition("if (SIGNED({}{}) <= 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bltzl: is_branch_likely = true; @@ -633,7 +660,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bltz: print_indent(); print_branch_condition("if (SIGNED({}{}) < 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_break: print_line("do_break({})", instr_vram); @@ -814,7 +841,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bc1t: print_indent(); print_branch_condition("if (c1cs)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bc1fl: is_branch_likely = true; @@ -822,7 +849,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bc1f: print_indent(); print_branch_condition("if (!c1cs)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; // Cop1 arithmetic