diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml new file mode 100644 index 0000000..85723cb --- /dev/null +++ b/.github/workflows/validate.yml @@ -0,0 +1,59 @@ +name: validate +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + type: [ Debug, Release ] + os: [ ubuntu-latest, windows-latest, macos-13, macos-14 ] # macOS 13 is intel and macOS 14 is arm + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ runner.os }}-N64Recomp-ccache + - name: Install Windows Dependencies + if: runner.os == 'Windows' + run: | + choco install ninja + Remove-Item -Path "C:\ProgramData\Chocolatey\bin\ccache.exe" -Force -ErrorAction SilentlyContinue + - name: Install Linux Dependencies + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y ninja-build + - name: Install macOS Dependencies + if: runner.os == 'macOS' + run: | + brew install ninja + - name: Configure Developer Command Prompt + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + - name: Build N64Recomp (Unix) + if: runner.os != 'Windows' + run: |- + # enable ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build + cmake --build cmake-build --config Debug --target N64Recomp -j 8 + - name: Build N64Recomp (Windows) + if: runner.os == 'Windows' + run: |- + # enable ccache + set $env:PATH="$env:USERPROFILE/.cargo/bin;$env:PATH" + + cmake -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build + cmake --build cmake-build --config Debug --target N64Recomp -j 8 diff --git a/.gitignore b/.gitignore index 594dcb7..147d789 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # VSCode file settings .vscode/settings.json +.vscode/c_cpp_properties.json # Input elf and rom files *.elf @@ -50,3 +51,5 @@ test/RT64 # Runtime files imgui.ini rt64.log +.idea +cmake-build* diff --git a/CMakeLists.txt b/CMakeLists.txt index dfaf460..4c06943 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,4 @@ cmake_minimum_required(VERSION 3.20) -project(Zelda64Recompiled) set(CMAKE_C_STANDARD 17) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -85,6 +84,7 @@ target_include_directories(RSPRecomp PRIVATE "${CMAKE_SOURCE_DIR}/lib/rabbitizer/include" "${CMAKE_SOURCE_DIR}/lib/rabbitizer/cplusplus/include" "${CMAKE_SOURCE_DIR}/lib/fmt/include" + "${CMAKE_SOURCE_DIR}/lib/toml11" "${CMAKE_SOURCE_DIR}/include") target_link_libraries(RSPRecomp fmt rabbitizer) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index 9e06719..4cdfd65 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -9,6 +9,7 @@ #include "rabbitizer.hpp" #include "fmt/format.h" #include "fmt/ostream.h" +#include "toml.hpp" using InstrId = rabbitizer::InstrId::UniqueId; using Cop0Reg = rabbitizer::Registers::Rsp::Cop0; @@ -459,10 +460,12 @@ bool process_instruction(size_t instr_index, const std::vector extra_indirect_branch_targets{ 0x1F80, 0x1250, 0x1154, 0x1094, 0x1E0C, 0x1514, 0x1E7C, 0x1C90, 0x1180, 0x1808, 0x11E8, 0x1ADC, 0x1B6C, 0x1194, 0x1EF8, 0x1240, 0x17C0, 0x186C, 0x1A58, 0x18BC, 0x1ABC, 0x1ACC, 0x1A80, 0x1BD4 }; //const std::unordered_set unsupported_instructions{}; -// BT n_aspMain -constexpr size_t rsp_text_offset = 0x1E4F3B0; -constexpr size_t rsp_text_size = 0xF80; -constexpr size_t rsp_text_address = 0x04001080; -std::string rom_file_path = "../../BTRecomp/banjotooie.decompressed.us.z64"; // uncompressed rom! -std::string output_file_path = "../../BTRecomp/rsp/n_aspMain.cpp"; -std::string output_function_name = "n_aspMain"; -const std::vector extra_indirect_branch_targets{ - // dispatch table - 0x1AE8, 0x143C, 0x1240, 0x1D84, 0x126C, 0x1B20, 0x12A8, 0x1214, 0x141C, 0x1310, 0x13CC, 0x12E4, 0x1FB0, 0x1358, 0x16EC, 0x1408 -}; -const std::unordered_set unsupported_instructions{ - // cmd_MP3 - 0x00001214 -}; - #ifdef _MSC_VER inline uint32_t byteswap(uint32_t val) { return _byteswap_ulong(val); @@ -591,20 +584,113 @@ constexpr uint32_t byteswap(uint32_t val) { } #endif -static_assert((rsp_text_size / instr_size) * instr_size == rsp_text_size, "RSP microcode must be a multiple of the instruction size"); +struct RSPRecompilerConfig { + size_t text_offset; + size_t text_size; + size_t text_address; + std::filesystem::path rom_file_path; + std::filesystem::path output_file_path; + std::string output_function_name; + std::vector extra_indirect_branch_targets; + std::unordered_set unsupported_instructions; +}; -int main() { - std::array instr_words{}; +std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { + if (!child.empty()) { + return parent / child; + } + return child; +} + +template +std::vector toml_to_vec(const toml::value& branch_targets_data) { + std::vector ret; + + if (branch_targets_data.type() != toml::value_t::array) { + return ret; + } + + // Get the funcs array as an array type. + const std::vector& branch_targets_array = branch_targets_data.as_array(); + + // Reserve room for all the funcs in the map. + ret.reserve(branch_targets_array.size()); + for (const toml::value& cur_target_val : branch_targets_array) { + ret.push_back(cur_target_val.as_integer()); + } + + return ret; +} + +bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) { + std::ifstream config_file {config_path}; + RSPRecompilerConfig ret{}; + + try { + const toml::value config_data = toml::parse(config_path); + std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path(); + + ret.text_offset = toml::find(config_data, "text_offset"); + ret.text_size = toml::find(config_data, "text_size"); + ret.text_address = toml::find(config_data, "text_address"); + + ret.rom_file_path = concat_if_not_empty(basedir, toml::find(config_data, "rom_file_path")); + ret.output_file_path = concat_if_not_empty(basedir, toml::find(config_data, "output_file_path")); + ret.output_function_name = toml::find(config_data, "output_function_name"); + + // Extra indirect branch targets (optional) + const toml::value& branch_targets_data = toml::find_or(config_data, "extra_indirect_branch_targets", toml::value{}); + if (branch_targets_data.type() != toml::value_t::empty) { + ret.extra_indirect_branch_targets = toml_to_vec(branch_targets_data); + } + + // Unsupported_instructions (optional) + const toml::value& unsupported_instructions_data = toml::find_or(config_data, "unsupported_instructions_data", toml::value{}); + if (unsupported_instructions_data.type() != toml::value_t::empty) { + ret.extra_indirect_branch_targets = toml_to_vec(unsupported_instructions_data); + } + } + catch (const toml::syntax_error& err) { + fmt::print(stderr, "Syntax error in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const toml::type_error& err) { + fmt::print(stderr, "Incorrect type in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const std::out_of_range& err) { + fmt::print(stderr, "Missing value in config file, full error:\n{}\n", err.what()); + return false; + } + + out = ret; + return true; +} + +int main(int argc, const char** argv) { + if (argc != 2) { + fmt::print("Usage: {} [config file]\n", argv[0]); + std::exit(EXIT_SUCCESS); + } + + RSPRecompilerConfig config; + if (!read_config(std::filesystem::path{argv[1]}, config)) { + fmt::print("Failed to parse config file {}\n", argv[0]); + std::exit(EXIT_FAILURE); + } + + std::vector instr_words{}; + instr_words.resize(config.text_size / sizeof(uint32_t)); { - std::ifstream rom_file{ rom_file_path, std::ios_base::binary }; + std::ifstream rom_file{ config.rom_file_path, std::ios_base::binary }; if (!rom_file.good()) { fmt::print(stderr, "Failed to open rom file\n"); return EXIT_FAILURE; } - rom_file.seekg(rsp_text_offset); - rom_file.read(reinterpret_cast(instr_words.data()), rsp_text_size); + rom_file.seekg(config.text_offset); + rom_file.read(reinterpret_cast(instr_words.data()), config.text_size); } // Disable appropriate pseudo instructions @@ -616,7 +702,7 @@ int main() { // Decode the instruction words into instructions std::vector instrs{}; instrs.reserve(instr_words.size()); - uint32_t vram = rsp_text_address & rsp_mem_mask; + uint32_t vram = config.text_address & rsp_mem_mask; for (uint32_t instr_word : instr_words) { const rabbitizer::InstructionRsp& instr = instrs.emplace_back(byteswap(instr_word), vram); vram += instr_size; @@ -626,13 +712,13 @@ int main() { BranchTargets branch_targets = get_branch_targets(instrs); // Add any additional indirect branch targets that may not be found directly in the code (e.g. from a jump table) - for (uint32_t target : extra_indirect_branch_targets) { + for (uint32_t target : config.extra_indirect_branch_targets) { branch_targets.indirect_targets.insert(target); } // Open output file and write beginning - std::filesystem::create_directories(std::filesystem::path{ output_file_path }.parent_path()); - std::ofstream output_file(output_file_path); + std::filesystem::create_directories(std::filesystem::path{ config.output_file_path }.parent_path()); + std::ofstream output_file(config.output_file_path); fmt::print(output_file, "#include \"rsp.h\"\n" "#include \"rsp_vu_impl.h\"\n" @@ -642,18 +728,19 @@ int main() { " uint32_t r16 = 0, r17 = 0, r18 = 0, r19 = 0, r20 = 0, r21 = 0, r22 = 0, r23 = 0;\n" " uint32_t r24 = 0, r25 = 0, r26 = 0, r27 = 0, r28 = 0, r29 = 0, r30 = 0, r31 = 0;\n" " uint32_t dma_dmem_address = 0, dma_dram_address = 0, jump_target = 0;\n" + " const char * debug_file = NULL; int debug_line = 0;\n" " RSP rsp{{}};\n" - " r1 = 0xFC0;\n", output_function_name); + " r1 = 0xFC0;\n", config.output_function_name); // Write each instruction for (size_t instr_index = 0; instr_index < instrs.size(); instr_index++) { - process_instruction(instr_index, instrs, output_file, branch_targets, unsupported_instructions, false, false); + process_instruction(instr_index, instrs, output_file, branch_targets, config.unsupported_instructions, false, false); } // Terminate instruction code with a return to indicate that the microcode has run past its end fmt::print(output_file, " return RspExitReason::ImemOverrun;\n"); // Write the section containing the indirect jump table - write_indirect_jumps(output_file, branch_targets, output_function_name); + write_indirect_jumps(output_file, branch_targets, config.output_function_name); // End the file fmt::print(output_file, "}}\n"); diff --git a/include/recomp_port.h b/include/recomp_port.h index 0140d2f..9c4b11b 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,8 @@ namespace RecompPort { struct FunctionSize { std::string func_name; uint32_t size_bytes; + + FunctionSize(const std::string& func_name, uint32_t size_bytes) : func_name(func_name), size_bytes(size_bytes) {} }; struct ManualFunction { @@ -49,6 +52,8 @@ namespace RecompPort { std::string section_name; uint32_t vram; uint32_t size; + + ManualFunction(const std::string& func_name, std::string section_name, uint32_t vram, uint32_t size) : func_name(func_name), section_name(std::move(section_name)), vram(vram), size(size) {} }; struct Config { @@ -82,11 +87,16 @@ namespace RecompPort { uint32_t addu_vram; uint32_t jr_vram; std::vector entries; + + JumpTable(uint32_t vram, uint32_t addend_reg, uint32_t rom, uint32_t lw_vram, uint32_t addu_vram, uint32_t jr_vram, std::vector&& entries) + : vram(vram), addend_reg(addend_reg), rom(rom), lw_vram(lw_vram), addu_vram(addu_vram), jr_vram(jr_vram), entries(std::move(entries)) {} }; struct AbsoluteJump { uint32_t jump_target; uint32_t instruction_vram; + + AbsoluteJump(uint32_t jump_target, uint32_t instruction_vram) : jump_target(jump_target), instruction_vram(instruction_vram) {} }; struct Function { @@ -98,6 +108,9 @@ namespace RecompPort { bool ignored; bool reimplemented; bool stubbed; + + Function(uint32_t vram, uint32_t rom, std::vector words, std::string name, ELFIO::Elf_Half section_index, bool ignored = false, bool reimplemented = false, bool stubbed = false) + : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {} }; enum class RelocType : uint8_t { diff --git a/src/main.cpp b/src/main.cpp index 3cdf1dd..6cb8fd2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -141,10 +141,6 @@ std::unordered_set reimplemented_funcs{ "__osInitialize_kmc", "__osInitialize_isv", "__osRdbSend", - // libgcc math routines (these throw off the recompiler) - "__udivdi3", - "__divdi3", - "__umoddi3", // ido math routines "__ull_div", "__ll_div", @@ -504,11 +500,7 @@ std::unordered_set ignored_funcs { "rmonGetRcpRegister", "kdebugserver", "send", - // libgcc math routines (these throw off the recompiler) - "__muldi3", - "__divdi3", - "__udivdi3", - "__umoddi3", + // ido math routines "__ll_div", "__ll_lshift", @@ -538,15 +530,25 @@ std::unordered_set ignored_funcs { }; std::unordered_set renamed_funcs{ + // Math "sincosf", "sinf", "cosf", "__sinf", "__cosf", + "asinf", + "acosf", + "atanf", + "atan2f", + "tanf", "sqrt", "sqrtf", + + // Memory "memcpy", "memset", + "memmove", + "memcmp", "strcmp", "strcat", "strcpy", @@ -557,8 +559,12 @@ std::unordered_set renamed_funcs{ "bzero", "bcopy", "bcmp", + + // long jumps "setjmp", "longjmp", + + // Math 2 "ldiv", "lldiv", "ceil", @@ -566,6 +572,8 @@ std::unordered_set renamed_funcs{ "floor", "floorf", "fmodf", + "fmod", + "modf", "lround", "lroundf", "nearbyint", @@ -574,11 +582,52 @@ std::unordered_set renamed_funcs{ "roundf", "trunc", "truncf", + + // printf family "vsprintf", + "gcvt", + "fcvt", + "ecvt", + "__assert", + + // allocations "malloc", "free", "realloc", + "calloc", + + // rand + "rand", + "srand", + "random", + + // gzip + "huft_build", + "huft_free", + "inflate_codes", + "inflate_stored", + "inflate_fixed", + "inflate_dynamic", + "inflate_block", + "inflate", + "expand_gzip", + "auRomDataRead" + "data_write", + "unzip", + "updcrc", + "clear_bufs", + "fill_inbuf", + "flush_window", + + // libgcc math routines + "__muldi3", + "__divdi3", + "__udivdi3", + "__umoddi3", + "div64_64", + "div64_32", + "__moddi3", }; bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) { @@ -1186,6 +1235,7 @@ int main(int argc, char** argv) { RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false; RabbitizerConfig_Cfg.pseudos.pseudoBnez = false; RabbitizerConfig_Cfg.pseudos.pseudoNot = false; + RabbitizerConfig_Cfg.pseudos.pseudoBal = false; std::vector relocatable_sections_ordered{}; diff --git a/src/recompilation.cpp b/src/recompilation.cpp index e525278..4b29725 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -47,6 +47,8 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C uint32_t reloc_section = 0; uint32_t reloc_target_section_offset = 0; + uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]); + // Check if this instruction has a reloc. if (section.relocatable && section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) { // Get the reloc data for this instruction @@ -104,27 +106,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } }; - auto print_branch = [&](fmt::format_string fmt_str, Ts ...args) { - fmt::print(output_file, "{{\n "); - if (instr_index < instructions.size() - 1) { - bool dummy_needs_link_branch; - bool dummy_is_branch_likely; - size_t next_reloc_index = reloc_index; - uint32_t next_vram = instr_vram + 4; - if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { - next_reloc_index++; - } - process_instruction(context, config, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out); - } - fmt::print(output_file, " "); - fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...)); - if (needs_link_branch) { - fmt::print(output_file, ";\n goto after_{}", link_branch_index); - } - fmt::print(output_file, ";\n }}\n"); - }; - - auto print_func_call = [&](uint32_t target_func_vram) { + auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true) { const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram); std::string jal_target_name; uint32_t section_vram_start = section.ram_addr; @@ -190,11 +172,46 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C return false; } } - needs_link_branch = true; + needs_link_branch = link_branch; print_unconditional_branch("{}(rdram, ctx)", jal_target_name); return true; }; + auto print_branch = [&](uint32_t branch_target) { + if (branch_target < func.vram || branch_target >= func_vram_end) { + // FIXME: how to deal with static functions? + if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { + fmt::print(output_file, "{{\n "); + fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); + print_func_call(branch_target, false); + print_line("return"); + fmt::print(output_file, ";\n }}\n"); + return; + } + + fmt::print(stderr, "[Warn] Function {} is branching outside of the function (to 0x{:08X})\n", func.name, branch_target); + } + + fmt::print(output_file, "{{\n "); + if (instr_index < instructions.size() - 1) { + bool dummy_needs_link_branch; + bool dummy_is_branch_likely; + size_t next_reloc_index = reloc_index; + uint32_t next_vram = instr_vram + 4; + if (reloc_index + 1 < section.relocs.size() && next_vram > section.relocs[reloc_index].address) { + next_reloc_index++; + } + process_instruction(context, config, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, next_reloc_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out); + } + + fmt::print(output_file, " "); + fmt::print(output_file, "goto L_{:08X}", branch_target); + if (needs_link_branch) { + fmt::print(output_file, ";\n goto after_{}", link_branch_index); + } + fmt::print(output_file, ";\n }}\n"); + }; + if (indent) { print_indent(); } @@ -216,8 +233,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C std::string unsigned_imm_string; std::string signed_imm_string; - uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]); - if (!at_reloc) { unsigned_imm_string = fmt::format("{:#X}", imm); signed_imm_string = fmt::format("{:#X}", (int16_t)imm); @@ -386,16 +401,28 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_mult: print_line("result = S64(S32({}{})) * S64(S32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_dmult: + print_line("DMULT(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_multu: print_line("result = U64(U32({}{})) * U64(U32({}{})); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_dmultu: + print_line("DMULTU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_div: // Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1 print_line("lo = S32(S64(S32({}{})) / S64(S32({}{}))); hi = S32(S64(S32({}{})) % S64(S32({}{})))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_ddiv: + print_line("DDIV(S64({}{}), S64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_divu: print_line("lo = S32(U32({}{}) / U32({}{})); hi = S32(U32({}{}) % U32({}{}))", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_ddivu: + print_line("DDIVU(U64({}{}), U64({}{}), &lo, &hi)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_mflo: print_line("{}{} = lo", ctx_gpr_prefix(rd), rd); break; @@ -480,7 +507,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_swr: print_line("do_swr(rdram, {}, {}{}, {}{})", signed_imm_string, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); break; - + // Branches case InstrId::cpu_jal: print_func_call(instr.getBranchVramGeneric()); @@ -499,16 +526,28 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C { uint32_t branch_target = instr.getBranchVramGeneric(); if (branch_target == instr_vram) { - print_line("void pause_self(uint8_t *rdram); pause_self(rdram)"); + print_line("pause_self(rdram)"); } // Check if the branch is within this function else if (branch_target >= func.vram && branch_target < func_vram_end) { print_unconditional_branch("goto L_{:08X}", branch_target); } - // Otherwise, check if it's a tail call - else if (instr_vram == func_vram_end - 2 * sizeof(func.words[0])) { - fmt::print("Tail call in {}\n", func.name); - print_func_call(branch_target); + // This may be a tail call in the middle of the control flow due to a previous check + // For example: + // ```c + // void test() { + // if (SOME_CONDITION) { + // do_a(); + // } else { + // do_b(); + // } + // } + // ``` + // FIXME: how to deal with static functions? + else if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { + fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); + print_func_call(branch_target, false); + print_line("return"); } else { fmt::print(stderr, "Unhandled branch in {} at 0x{:08X} to 0x{:08X}\n", func.name, instr_vram, branch_target); @@ -524,7 +563,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C [instr_vram](const RecompPort::JumpTable& jtbl) { return jtbl.jr_vram == instr_vram; }); - + if (jtbl_find_result != stats.jump_tables.end()) { const RecompPort::JumpTable& cur_jtbl = *jtbl_find_result; bool dummy_needs_link_branch, dummy_is_branch_likely; @@ -581,7 +620,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bne: print_indent(); print_branch_condition("if ({}{} != {}{})", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_beql: is_branch_likely = true; @@ -589,7 +628,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_beq: print_indent(); print_branch_condition("if ({}{} == {}{})", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bgezl: is_branch_likely = true; @@ -597,7 +636,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bgez: print_indent(); print_branch_condition("if (SIGNED({}{}) >= 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bgtzl: is_branch_likely = true; @@ -605,7 +644,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bgtz: print_indent(); print_branch_condition("if (SIGNED({}{}) > 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_blezl: is_branch_likely = true; @@ -613,7 +652,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_blez: print_indent(); print_branch_condition("if (SIGNED({}{}) <= 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bltzl: is_branch_likely = true; @@ -621,11 +660,20 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bltz: print_indent(); print_branch_condition("if (SIGNED({}{}) < 0)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_break: print_line("do_break({})", instr_vram); break; + case InstrId::cpu_bgezall: + is_branch_likely = true; + [[fallthrough]]; + case InstrId::cpu_bgezal: + print_indent(); + print_branch_condition("if (SIGNED({}{}) >= 0) {{", ctx_gpr_prefix(rs), rs); + print_func_call(instr.getBranchVramGeneric()); + print_line("}}"); + break; // Cop1 loads/stores case InstrId::cpu_mtc1: @@ -793,7 +841,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bc1t: print_indent(); print_branch_condition("if (c1cs)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_bc1fl: is_branch_likely = true; @@ -801,7 +849,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C case InstrId::cpu_bc1f: print_indent(); print_branch_condition("if (!c1cs)", ctx_gpr_prefix(rs), rs); - print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); + print_branch((uint32_t)instr.getBranchVramGeneric()); break; // Cop1 arithmetic @@ -929,6 +977,28 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C print_line("NAN_CHECK(ctx->f{}.d)", fs); print_line("ctx->f{}.fl = CVT_S_D(ctx->f{}.d)", fd, fs); break; + case InstrId::cpu_cvt_d_l: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("ctx->f{}.d = CVT_D_L(ctx->f{}.u64)", fd, fs); + break; + case InstrId::cpu_cvt_l_d: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("NAN_CHECK(ctx->f{}.d)", fs); + print_line("ctx->f{}.u64 = CVT_L_D(ctx->f{}.d)", fd, fs); + break; + case InstrId::cpu_cvt_s_l: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("ctx->f{}.fl = CVT_S_L(ctx->f{}.u64)", fd, fs); + break; + case InstrId::cpu_cvt_l_s: + print_line("CHECK_FR(ctx, {})", fd); + print_line("CHECK_FR(ctx, {})", fs); + print_line("NAN_CHECK(ctx->f{}.fl)", fs); + print_line("ctx->f{}.u64 = CVT_L_S(ctx->f{}.fl)", fd, fs); + break; case InstrId::cpu_trunc_w_s: print_line("CHECK_FR(ctx, {})", fd); print_line("CHECK_FR(ctx, {})", fs);