From e0e52d1fc3399d6153b41db23faea5cf4fc8c976 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy <68165316+Mr-Wiseguy@users.noreply.github.com> Date: Thu, 16 May 2024 22:33:08 -0400 Subject: [PATCH 1/4] Symbol file toml update (#52) * Symbol input file mechanism * Migration to new toml lib --------- Co-authored-by: dcvz --- .gitmodules | 6 +- CMakeLists.txt | 31 +- RSPRecomp/src/rsp_recomp.cpp | 157 ++++----- include/recomp_port.h | 12 +- lib/fmt | 2 +- lib/toml11 | 1 - lib/tomlplusplus | 1 + src/analysis.cpp | 40 +-- src/config.cpp | 629 ++++++++++++++++++++++++----------- src/main.cpp | 236 ++++++++++--- 10 files changed, 748 insertions(+), 367 deletions(-) delete mode 160000 lib/toml11 create mode 160000 lib/tomlplusplus diff --git a/.gitmodules b/.gitmodules index cdaa6ae..2d7b930 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,6 +7,6 @@ [submodule "lib/fmt"] path = lib/fmt url = https://github.com/fmtlib/fmt -[submodule "lib/toml11"] - path = lib/toml11 - url = https://github.com/ToruNiina/toml11 +[submodule "lib/tomlplusplus"] + path = lib/tomlplusplus + url = https://github.com/marzer/tomlplusplus diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c06943..b2e6fca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,9 +5,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) # set(CMAKE_CXX_VISIBILITY_PRESET hidden) -# fmtlib -add_subdirectory(lib/fmt) - # Rabbitizer project(rabbitizer) add_library(rabbitizer STATIC) @@ -51,11 +48,20 @@ target_sources(rabbitizer PRIVATE "${CMAKE_SOURCE_DIR}/lib/rabbitizer/src/instructions/RabbitizerRegister.c" "${CMAKE_SOURCE_DIR}/lib/rabbitizer/src/instructions/RabbitizerRegisterDescriptor.c") -target_include_directories(rabbitizer PRIVATE +target_include_directories(rabbitizer PUBLIC "${CMAKE_SOURCE_DIR}/lib/rabbitizer/include" - "${CMAKE_SOURCE_DIR}/lib/rabbitizer/cplusplus/include" + "${CMAKE_SOURCE_DIR}/lib/rabbitizer/cplusplus/include") + +target_include_directories(rabbitizer PRIVATE "${CMAKE_SOURCE_DIR}/lib/rabbitizer/tables") +# fmtlib +add_subdirectory(lib/fmt) + +# tomlplusplus +set(TOML_ENABLE_FORMATTERS OFF) +add_subdirectory(lib/tomlplusplus) + # N64 recompiler project(N64Recomp) add_executable(N64Recomp) @@ -67,27 +73,18 @@ target_sources(N64Recomp PRIVATE ${CMAKE_SOURCE_DIR}/src/recompilation.cpp) target_include_directories(N64Recomp PRIVATE - "${CMAKE_SOURCE_DIR}/lib/rabbitizer/include" - "${CMAKE_SOURCE_DIR}/lib/rabbitizer/cplusplus/include" "${CMAKE_SOURCE_DIR}/lib/ELFIO" - "${CMAKE_SOURCE_DIR}/lib/fmt/include" - "${CMAKE_SOURCE_DIR}/lib/toml11" "${CMAKE_SOURCE_DIR}/include") - target_link_libraries(N64Recomp fmt rabbitizer) +target_link_libraries(N64Recomp fmt rabbitizer tomlplusplus::tomlplusplus) # RSP recompiler project(RSPRecomp) add_executable(RSPRecomp) -target_include_directories(RSPRecomp PRIVATE - "${CMAKE_SOURCE_DIR}/lib/rabbitizer/include" - "${CMAKE_SOURCE_DIR}/lib/rabbitizer/cplusplus/include" - "${CMAKE_SOURCE_DIR}/lib/fmt/include" - "${CMAKE_SOURCE_DIR}/lib/toml11" - "${CMAKE_SOURCE_DIR}/include") +target_include_directories(RSPRecomp PRIVATE "${CMAKE_SOURCE_DIR}/include") -target_link_libraries(RSPRecomp fmt rabbitizer) +target_link_libraries(RSPRecomp fmt rabbitizer tomlplusplus::tomlplusplus) target_sources(RSPRecomp PRIVATE ${CMAKE_SOURCE_DIR}/RSPRecomp/src/rsp_recomp.cpp) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index 4cdfd65..c3c35b8 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -5,11 +5,12 @@ #include #include #include +#include #include #include "rabbitizer.hpp" #include "fmt/format.h" #include "fmt/ostream.h" -#include "toml.hpp" +#include using InstrId = rabbitizer::InstrId::UniqueId; using Cop0Reg = rabbitizer::Registers::Rsp::Cop0; @@ -541,39 +542,6 @@ void write_indirect_jumps(std::ofstream& output_file, const BranchTargets& branc " return RspExitReason::UnhandledJumpTarget;\n", output_function_name); } -// TODO de-hardcode these -// OoT njpgdspMain -//constexpr size_t rsp_text_offset = 0xB8BAD0; -//constexpr size_t rsp_text_size = 0xAF0; -//constexpr size_t rsp_text_address = 0x04001080; -//std::string rom_file_path = "../test/oot_mq_debug.z64"; -//std::string output_file_path = "../test/rsp/njpgdspMain.cpp"; -//std::string output_function_name = "njpgdspMain"; -//const std::vector extra_indirect_branch_targets{}; -//const std::unordered_set unsupported_instructions{}; - -// OoT aspMain -//constexpr size_t rsp_text_offset = 0xB89260; -//constexpr size_t rsp_text_size = 0xFB0; -//constexpr size_t rsp_text_address = 0x04001000; -//std::string rom_file_path = "../test/oot_mq_debug.z64"; -//std::string output_file_path = "../test/rsp/aspMain.cpp"; -//std::string output_function_name = "aspMain"; -//const std::vector extra_indirect_branch_targets{ 0x1F68, 0x1230, 0x114C, 0x1F18, 0x1E2C, 0x14F4, 0x1E9C, 0x1CB0, 0x117C, 0x17CC, 0x11E8, 0x1AA4, 0x1B34, 0x1190, 0x1C5C, 0x1220, 0x1784, 0x1830, 0x1A20, 0x1884, 0x1A84, 0x1A94, 0x1A48, 0x1BA0 }; -//const std::unordered_set unsupported_instructions{}; - -// MM's njpgdspMain is identical to OoT's - -//// MM aspMain -//constexpr size_t rsp_text_offset = 0xC40FF0; -//constexpr size_t rsp_text_size = 0x1000; -//constexpr size_t rsp_text_address = 0x04001000; -//std::string rom_file_path = "../../MMRecomp/mm.us.rev1.z64"; // uncompressed rom! -//std::string output_file_path = "../../MMRecomp/rsp/aspMain.cpp"; -//std::string output_function_name = "aspMain"; -//const std::vector extra_indirect_branch_targets{ 0x1F80, 0x1250, 0x1154, 0x1094, 0x1E0C, 0x1514, 0x1E7C, 0x1C90, 0x1180, 0x1808, 0x11E8, 0x1ADC, 0x1B6C, 0x1194, 0x1EF8, 0x1240, 0x17C0, 0x186C, 0x1A58, 0x18BC, 0x1ABC, 0x1ACC, 0x1A80, 0x1BD4 }; -//const std::unordered_set unsupported_instructions{}; - #ifdef _MSC_VER inline uint32_t byteswap(uint32_t val) { return _byteswap_ulong(val); @@ -603,65 +571,106 @@ std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, c } template -std::vector toml_to_vec(const toml::value& branch_targets_data) { +std::vector toml_to_vec(const toml::array* array) { std::vector ret; - if (branch_targets_data.type() != toml::value_t::array) { - return ret; - } - - // Get the funcs array as an array type. - const std::vector& branch_targets_array = branch_targets_data.as_array(); - // Reserve room for all the funcs in the map. - ret.reserve(branch_targets_array.size()); - for (const toml::value& cur_target_val : branch_targets_array) { - ret.push_back(cur_target_val.as_integer()); - } + ret.reserve(array->size()); + array->for_each([&ret](auto&& el) { + if constexpr (toml::is_integer) { + ret.push_back(*el); + } + }); return ret; } +template +std::unordered_set toml_to_set(const toml::array* array) { + std::unordered_set ret; + + array->for_each([&ret](auto&& el) { + if constexpr (toml::is_integer) { + ret.insert(*el); + } + }); + + return ret; +} + bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) { - std::ifstream config_file {config_path}; RSPRecompilerConfig ret{}; try { - const toml::value config_data = toml::parse(config_path); + const toml::table config_data = toml::parse_file(config_path.u8string()); std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path(); - ret.text_offset = toml::find(config_data, "text_offset"); - ret.text_size = toml::find(config_data, "text_size"); - ret.text_address = toml::find(config_data, "text_address"); + std::optional text_offset = config_data["text_offset"].value(); + if (text_offset.has_value()) { + ret.text_offset = text_offset.value(); + } + else { + throw toml::parse_error("Missing text_offset in config file", config_data.source()); + } - ret.rom_file_path = concat_if_not_empty(basedir, toml::find(config_data, "rom_file_path")); - ret.output_file_path = concat_if_not_empty(basedir, toml::find(config_data, "output_file_path")); - ret.output_function_name = toml::find(config_data, "output_function_name"); + std::optional text_size = config_data["text_size"].value(); + if (text_size.has_value()) { + ret.text_size = text_size.value(); + } + else { + throw toml::parse_error("Missing text_size in config file", config_data.source()); + } + + std::optional text_address = config_data["text_address"].value(); + if (text_address.has_value()) { + ret.text_address = text_address.value(); + } + else { + throw toml::parse_error("Missing text_address in config file", config_data.source()); + } + + std::optional rom_file_path = config_data["rom_file_path"].value(); + if (rom_file_path.has_value()) { + ret.rom_file_path = concat_if_not_empty(basedir, rom_file_path.value()); + } + else { + throw toml::parse_error("Missing rom_file_path in config file", config_data.source()); + } + + std::optional output_file_path = config_data["output_file_path"].value(); + if (output_file_path.has_value()) { + ret.output_file_path = concat_if_not_empty(basedir, output_file_path.value()); + } + else { + throw toml::parse_error("Missing output_file_path in config file", config_data.source()); + } + + std::optional output_function_name = config_data["output_function_name"].value(); + if (output_function_name.has_value()) { + ret.output_function_name = output_function_name.value(); + } + else { + throw toml::parse_error("Missing output_function_name in config file", config_data.source()); + } // Extra indirect branch targets (optional) - const toml::value& branch_targets_data = toml::find_or(config_data, "extra_indirect_branch_targets", toml::value{}); - if (branch_targets_data.type() != toml::value_t::empty) { - ret.extra_indirect_branch_targets = toml_to_vec(branch_targets_data); - } + const toml::node_view branch_targets_data = config_data["extra_indirect_branch_targets"]; + if (branch_targets_data.is_array()) { + const toml::array* branch_targets_array = branch_targets_data.as_array(); + ret.extra_indirect_branch_targets = toml_to_vec(branch_targets_array); + } // Unsupported_instructions (optional) - const toml::value& unsupported_instructions_data = toml::find_or(config_data, "unsupported_instructions_data", toml::value{}); - if (unsupported_instructions_data.type() != toml::value_t::empty) { - ret.extra_indirect_branch_targets = toml_to_vec(unsupported_instructions_data); - } - } - catch (const toml::syntax_error& err) { - fmt::print(stderr, "Syntax error in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); - return false; - } - catch (const toml::type_error& err) { - fmt::print(stderr, "Incorrect type in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); - return false; - } - catch (const std::out_of_range& err) { - fmt::print(stderr, "Missing value in config file, full error:\n{}\n", err.what()); - return false; + const toml::node_view unsupported_instructions_data = config_data["unsupported_instructions"]; + if (unsupported_instructions_data.is_array()) { + const toml::array* unsupported_instructions_array = unsupported_instructions_data.as_array(); + ret.unsupported_instructions = toml_to_set(unsupported_instructions_array); + } } + catch (const toml::parse_error& err) { + std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; + return false; + } out = ret; return true; diff --git a/include/recomp_port.h b/include/recomp_port.h index 9c4b11b..a74a8b5 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -44,7 +44,7 @@ namespace RecompPort { std::string func_name; uint32_t size_bytes; - FunctionSize(const std::string& func_name, uint32_t size_bytes) : func_name(func_name), size_bytes(size_bytes) {} + FunctionSize(const std::string& func_name, uint32_t size_bytes) : func_name(std::move(func_name)), size_bytes(size_bytes) {} }; struct ManualFunction { @@ -53,7 +53,7 @@ namespace RecompPort { uint32_t vram; uint32_t size; - ManualFunction(const std::string& func_name, std::string section_name, uint32_t vram, uint32_t size) : func_name(func_name), section_name(std::move(section_name)), vram(vram), size(size) {} + ManualFunction(const std::string& func_name, std::string section_name, uint32_t vram, uint32_t size) : func_name(std::move(func_name)), section_name(std::move(section_name)), vram(vram), size(size) {} }; struct Config { @@ -63,6 +63,8 @@ namespace RecompPort { bool single_file_output; bool use_absolute_symbols; std::filesystem::path elf_path; + std::filesystem::path symbols_file_path; + std::filesystem::path rom_file_path; std::filesystem::path output_func_path; std::filesystem::path relocatable_sections_path; std::vector stubbed_funcs; @@ -111,6 +113,7 @@ namespace RecompPort { Function(uint32_t vram, uint32_t rom, std::vector words, std::string name, ELFIO::Elf_Half section_index, bool ignored = false, bool reimplemented = false, bool stubbed = false) : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {} + Function() = default; }; enum class RelocType : uint8_t { @@ -130,7 +133,6 @@ namespace RecompPort { uint32_t symbol_index; uint32_t target_section; RelocType type; - bool needs_relocation; }; struct Section { @@ -175,6 +177,10 @@ namespace RecompPort { rom.reserve(8 * 1024 * 1024); executable_section_count = 0; } + + static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, Context& out); + + Context() = default; }; bool analyze_function(const Context& context, const Function& function, const std::vector& instructions, FunctionStats& stats); diff --git a/lib/fmt b/lib/fmt index d2e89c8..8e72804 160000 --- a/lib/fmt +++ b/lib/fmt @@ -1 +1 @@ -Subproject commit d2e89c8b080394e996d449371267365c223ca76b +Subproject commit 8e728044f673774160f43b44a07c6b185352310f diff --git a/lib/toml11 b/lib/toml11 deleted file mode 160000 index d47fe78..0000000 --- a/lib/toml11 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d47fe788bcb08c9d0d2a73954a0dfaf512964fdc diff --git a/lib/tomlplusplus b/lib/tomlplusplus new file mode 160000 index 0000000..1f7884e --- /dev/null +++ b/lib/tomlplusplus @@ -0,0 +1 @@ +Subproject commit 1f7884e59165e517462f922e7b6de131bd9844f3 diff --git a/src/analysis.cpp b/src/analysis.cpp index 5068d85..f98b737 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -19,31 +19,31 @@ struct RegState { bool valid_addiu; bool valid_addend; // For tracking a register that has been loaded from RAM -uint32_t loaded_lw_vram; -uint32_t loaded_addu_vram; -uint32_t loaded_address; -uint8_t loaded_addend_reg; -bool valid_loaded; + uint32_t loaded_lw_vram; + uint32_t loaded_addu_vram; + uint32_t loaded_address; + uint8_t loaded_addend_reg; + bool valid_loaded; -RegState() = default; + RegState() = default; -void invalidate() { - prev_lui = 0; - prev_addiu_vram = 0; - prev_addu_vram = 0; - prev_addend_reg = 0; + void invalidate() { + prev_lui = 0; + prev_addiu_vram = 0; + prev_addu_vram = 0; + prev_addend_reg = 0; - valid_lui = false; - valid_addiu = false; - valid_addend = false; + valid_lui = false; + valid_addiu = false; + valid_addend = false; - loaded_lw_vram = 0; - loaded_addu_vram = 0; - loaded_address = 0; - loaded_addend_reg = 0; + loaded_lw_vram = 0; + loaded_addu_vram = 0; + loaded_address = 0; + loaded_addend_reg = 0; - valid_loaded = false; -} + valid_loaded = false; + } }; using InstrId = rabbitizer::InstrId::UniqueId; diff --git a/src/config.cpp b/src/config.cpp index dac6991..b22f9bf 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -1,94 +1,80 @@ #include -#include "toml.hpp" +#include #include "fmt/format.h" #include "recomp_port.h" -// Error type for invalid values in the config file. -struct value_error : public toml::exception { - public: - explicit value_error(const std::string& what_arg, const toml::source_location& loc) - : exception(loc), what_(what_arg) { - } - virtual ~value_error() noexcept override = default; - virtual const char* what() const noexcept override { return what_.c_str(); } - - protected: - std::string what_; -}; - -std::vector get_manual_funcs(const toml::value& manual_funcs_data) { +std::vector get_manual_funcs(const toml::array* manual_funcs_array) { std::vector ret; - if (manual_funcs_data.type() != toml::value_t::array) { - return ret; - } - - // Get the funcs array as an array type. - const toml::array& manual_funcs_array = manual_funcs_data.as_array(); - // Reserve room for all the funcs in the map. - ret.reserve(manual_funcs_array.size()); - for (const toml::value& cur_func_val : manual_funcs_array) { - const std::string& func_name = toml::find(cur_func_val, "name"); - const std::string& section_name = toml::find(cur_func_val, "section"); - uint32_t vram_in = toml::find(cur_func_val, "vram"); - uint32_t size = toml::find(cur_func_val, "size"); + ret.reserve(manual_funcs_array->size()); + manual_funcs_array->for_each([&ret](auto&& el) { + if constexpr (toml::is_table) { + std::optional func_name = el["name"].template value(); + std::optional section_name = el["section"].template value(); + std::optional vram_in = el["vram"].template value(); + std::optional size = el["size"].template value(); - ret.emplace_back(func_name, section_name, vram_in, size); - } + if (func_name.has_value() && section_name.has_value() && vram_in.has_value() && size.has_value()) { + ret.emplace_back(func_name.value(), section_name.value(), vram_in.value(), size.value()); + } else { + throw toml::parse_error("Missing required value in manual_funcs array", el.source()); + } + } + else { + throw toml::parse_error("Missing required value in manual_funcs array", el.source()); + } + }); return ret; } -std::vector get_stubbed_funcs(const toml::value& patches_data) { +std::vector get_stubbed_funcs(const toml::table* patches_data) { std::vector stubbed_funcs{}; // Check if the stubs array exists. - const auto& stubs_data = toml::find_or(patches_data, "stubs", toml::value{}); + const toml::node_view stubs_data = (*patches_data)["stubs"]; - if (stubs_data.type() == toml::value_t::empty) { - // No stubs, nothing to do here. - return stubbed_funcs; - } + if (stubs_data.is_array()) { + const toml::array* stubs_array = stubs_data.as_array(); - // Get the stubs array as an array type. - const toml::array& stubs_array = stubs_data.as_array(); + // Make room for all the stubs in the array. + stubbed_funcs.reserve(stubs_array->size()); - // Make room for all the stubs in the array. - stubbed_funcs.resize(stubs_array.size()); - - // Gather the stubs and place them into the array. - for (size_t stub_idx = 0; stub_idx < stubs_array.size(); stub_idx++) { - // Copy the entry into the stubbed function list. - stubbed_funcs[stub_idx] = stubs_array[stub_idx].as_string(); - } + // Gather the stubs and place them into the array. + stubs_array->for_each([&stubbed_funcs](auto&& el) { + if constexpr (toml::is_string) { + stubbed_funcs.push_back(*el); + } + else { + throw toml::parse_error("Invalid stubbed function", el.source()); + } + }); + } return stubbed_funcs; } -std::vector get_ignored_funcs(const toml::value& patches_data) { +std::vector get_ignored_funcs(const toml::table* patches_data) { std::vector ignored_funcs{}; // Check if the ignored funcs array exists. - const auto& ignored_funcs_data = toml::find_or(patches_data, "ignored", toml::value{}); + const toml::node_view ignored_funcs_data = (*patches_data)["ignored"]; - if (ignored_funcs_data.type() == toml::value_t::empty) { - // No stubs, nothing to do here. - return ignored_funcs; - } + if (ignored_funcs_data.is_array()) { + const toml::array* ignored_funcs_array = ignored_funcs_data.as_array(); - // Get the ignored funcs array as an array type. - const toml::array& ignored_funcs_array = ignored_funcs_data.as_array(); + // Make room for all the ignored funcs in the array. + ignored_funcs.reserve(ignored_funcs_array->size()); - // Make room for all the ignored funcs in the array. - ignored_funcs.resize(ignored_funcs_array.size()); - - // Gather the stubs and place them into the array. - for (size_t stub_idx = 0; stub_idx < ignored_funcs_array.size(); stub_idx++) { - // Copy the entry into the ignored function list. - ignored_funcs[stub_idx] = ignored_funcs_array[stub_idx].as_string(); - } + // Gather the stubs and place them into the array. + ignored_funcs_array->for_each([&ignored_funcs](auto&& el) { + if constexpr (toml::is_string) { + ignored_funcs.push_back(*el); + } + }); + } return ignored_funcs; } @@ -98,120 +84,142 @@ std::unordered_map arg_type_map{ {"s32", RecompPort::FunctionArgType::s32}, }; -std::vector parse_args(const toml::array& args_in) { - std::vector ret(args_in.size()); +std::vector parse_args(const toml::array* args_in) { + std::vector ret(args_in->size()); - for (size_t arg_idx = 0; arg_idx < args_in.size(); arg_idx++) { - const toml::value& arg_val = args_in[arg_idx]; - const std::string& arg_str = arg_val.as_string(); + args_in->for_each([&ret](auto&& el) { + if constexpr (toml::is_string) { + const std::string& arg_str = *el; - // Check if the argument type string is valid. - auto type_find = arg_type_map.find(arg_str); - if (type_find == arg_type_map.end()) { - // It's not, so throw an error (and make it look like a normal toml one). - throw toml::type_error(toml::detail::format_underline( - std::string{ std::source_location::current().function_name() } + ": invalid function arg type", { - {arg_val.location(), ""} - }), arg_val.location()); - } - ret[arg_idx] = type_find->second; - } + // Check if the argument type string is valid. + auto type_find = arg_type_map.find(arg_str); + if (type_find == arg_type_map.end()) { + // It's not, so throw an error (and make it look like a normal toml one). + throw toml::parse_error(("Invalid argument type: " + arg_str).c_str(), el.source()); + } + ret.push_back(type_find->second); + } + else { + throw toml::parse_error("Invalid function argument entry", el.source()); + } + }); return ret; } -RecompPort::DeclaredFunctionMap get_declared_funcs(const toml::value& patches_data) { +RecompPort::DeclaredFunctionMap get_declared_funcs(const toml::table* patches_data) { RecompPort::DeclaredFunctionMap declared_funcs{}; // Check if the func array exists. - const toml::value& funcs_data = toml::find_or(patches_data, "func", toml::value{}); - if (funcs_data.type() == toml::value_t::empty) { - // No func array, nothing to do here - return declared_funcs; - } + const toml::node_view funcs_data = (*patches_data)["func"]; - // Get the funcs array as an array type. - const toml::array& funcs_array = funcs_data.as_array(); + if (funcs_data.is_array()) { + const toml::array* funcs_array = funcs_data.as_array(); - // Reserve room for all the funcs in the map. - declared_funcs.reserve(funcs_array.size()); - for (const toml::value& cur_func_val : funcs_array) { - const std::string& func_name = toml::find(cur_func_val, "name"); - const toml::array& args_in = toml::find(cur_func_val, "args"); - - declared_funcs.emplace(func_name, parse_args(args_in)); - } + // Reserve room for all the funcs in the map. + declared_funcs.reserve(funcs_array->size()); + + // Gather the funcs and place them into the map. + funcs_array->for_each([&declared_funcs](auto&& el) { + if constexpr (toml::is_table) { + std::optional func_name = el["name"].template value(); + toml::node_view args_in = el["args"]; + + if (func_name.has_value() && args_in.is_array()) { + const toml::array* args_array = args_in.as_array(); + declared_funcs.emplace(func_name.value(), parse_args(args_array)); + } else { + throw toml::parse_error("Missing required value in func array", el.source()); + } + } + else { + throw toml::parse_error("Invalid declared function entry", el.source()); + } + }); + } return declared_funcs; } -std::vector get_func_sizes(const toml::value& patches_data) { +std::vector get_func_sizes(const toml::table* patches_data) { std::vector func_sizes{}; // Check if the func size array exists. - const toml::value& sizes_data = toml::find_or(patches_data, "function_sizes", toml::value{}); - if (sizes_data.type() == toml::value_t::empty) { - // No func size array, nothing to do here - return func_sizes; - } + const toml::node_view funcs_data = (*patches_data)["function_sizes"]; + if (funcs_data.is_array()) { + const toml::array* sizes_array = funcs_data.as_array(); - // Get the funcs array as an array type. - const toml::array& sizes_array = sizes_data.as_array(); + // Copy all the sizes into the output vector. + sizes_array->for_each([&func_sizes](auto&& el) { + if constexpr (toml::is_table) { + const toml::table& cur_size = *el.as_table(); - // Reserve room for all the funcs in the map. - func_sizes.reserve(sizes_array.size()); - for (const toml::value& cur_func_size : sizes_array) { - const std::string& func_name = toml::find(cur_func_size, "name"); - uint32_t func_size = toml::find(cur_func_size, "size"); + // Get the function name and size. + std::optional func_name = cur_size["name"].value(); + std::optional func_size = cur_size["size"].value(); - // Make sure the size is divisible by 4 - if (func_size & (4 - 1)) { - // It's not, so throw an error (and make it look like a normal toml one). - throw toml::type_error(toml::detail::format_underline( - std::string{ std::source_location::current().function_name() } + ": function size not divisible by 4", { - {cur_func_size.location(), ""} - }), cur_func_size.location()); - } + if (func_name.has_value() && func_size.has_value()) { + // Make sure the size is divisible by 4 + if (func_size.value() & (4 - 1)) { + // It's not, so throw an error (and make it look like a normal toml one). + throw toml::parse_error("Function size is not divisible by 4", el.source()); + } + } + else { + throw toml::parse_error("Manually size function is missing required value(s)", el.source()); + } - func_sizes.emplace_back(func_name, func_size); - } + func_sizes.emplace_back(func_name.value(), func_size.value()); + } + else { + throw toml::parse_error("Invalid manually sized function entry", el.source()); + } + }); + } return func_sizes; } -std::vector get_instruction_patches(const toml::value& patches_data) { +std::vector get_instruction_patches(const toml::table* patches_data) { std::vector ret; // Check if the instruction patch array exists. - const toml::value& insn_patch_data = toml::find_or(patches_data, "instruction", toml::value{}); - if (insn_patch_data.type() == toml::value_t::empty) { - // No instruction patch array, nothing to do here - return ret; - } + const toml::node_view insn_patch_data = (*patches_data)["instruction"]; - // Get the instruction patch array as an array type. - const toml::array& insn_patch_array = insn_patch_data.as_array(); - ret.resize(insn_patch_array.size()); + if (insn_patch_data.is_array()) { + const toml::array* insn_patch_array = insn_patch_data.as_array(); + ret.reserve(insn_patch_array->size()); - // Copy all the patches into the output vector. - for (size_t patch_idx = 0; patch_idx < insn_patch_array.size(); patch_idx++) { - const toml::value& cur_patch = insn_patch_array[patch_idx]; + // Copy all the patches into the output vector. + insn_patch_array->for_each([&ret](auto&& el) { + if constexpr (toml::is_table) { + const toml::table& cur_patch = *el.as_table(); - // Get the vram and make sure it's 4-byte aligned. - const toml::value& vram_value = toml::find(cur_patch, "vram"); - int32_t vram = toml::get(vram_value); - if (vram & 0b11) { - // Not properly aligned, so throw an error (and make it look like a normal toml one). - throw value_error(toml::detail::format_underline( - std::string{ std::source_location::current().function_name() } + ": instruction vram is not 4-byte aligned!", { - {vram_value.location(), ""} - }), vram_value.location()); - } + // Get the vram and make sure it's 4-byte aligned. + std::optional vram = cur_patch["vram"].value(); + std::optional func_name = cur_patch["func"].value(); + std::optional value = cur_patch["value"].value(); - ret[patch_idx].func_name = toml::find(cur_patch, "func"); - ret[patch_idx].vram = toml::find(cur_patch, "vram"); - ret[patch_idx].value = toml::find(cur_patch, "value"); - } + if (!vram.has_value() || !func_name.has_value() || !value.has_value()) { + throw toml::parse_error("Instruction patch is missing required value(s)", el.source()); + } + + if (vram.value() & 0b11) { + // Not properly aligned, so throw an error (and make it look like a normal toml one). + throw toml::parse_error("Instruction patch is not word-aligned", el.source()); + } + + ret.push_back(RecompPort::InstructionPatch{ + .func_name = func_name.value(), + .vram = (int32_t)vram.value(), + .value = value.value(), + }); + } + else { + throw toml::parse_error("Invalid instruction patch entry", el.source()); + } + }); + } return ret; } @@ -227,71 +235,296 @@ RecompPort::Config::Config(const char* path) { // Start this config out as bad so that it has to finish parsing without errors to be good. entrypoint = 0; bad = true; + toml::table config_data{}; - try { - const toml::value config_data = toml::parse(path); - std::filesystem::path basedir = std::filesystem::path{ path }.parent_path(); + try { + config_data = toml::parse_file(path); + std::filesystem::path basedir = std::filesystem::path{ path }.parent_path(); - // Input section (required) - const toml::value& input_data = toml::find(config_data, "input"); + // Input section (required) + const auto input_data = config_data["input"]; + const auto entrypoint_data = input_data["entrypoint"]; - if (input_data.contains("entrypoint")) { - entrypoint = toml::find(input_data, "entrypoint"); - has_entrypoint = true; - } - else { - has_entrypoint = false; - } - elf_path = concat_if_not_empty(basedir, toml::find(input_data, "elf_path")); - output_func_path = concat_if_not_empty(basedir, toml::find(input_data, "output_func_path")); - relocatable_sections_path = concat_if_not_empty(basedir, toml::find_or(input_data, "relocatable_sections_path", "")); - uses_mips3_float_mode = toml::find_or(input_data, "uses_mips3_float_mode", false); - bss_section_suffix = toml::find_or(input_data, "bss_section_suffix", ".bss"); - single_file_output = toml::find_or(input_data, "single_file_output", false); - use_absolute_symbols = toml::find_or(input_data, "use_absolute_symbols", false); + if (entrypoint_data) { + const auto entrypoint_value = entrypoint_data.value(); + if (entrypoint_value.has_value()) { + entrypoint = (int32_t)entrypoint_value.value(); + has_entrypoint = true; + } + else { + throw toml::parse_error("Invalid entrypoint", entrypoint_data.node()->source()); + } + } + else { + has_entrypoint = false; + } - // Manual functions (optional) - const toml::value& manual_functions_data = toml::find_or(input_data, "manual_funcs", toml::value{}); - if (manual_functions_data.type() != toml::value_t::empty) { - manual_functions = get_manual_funcs(manual_functions_data); - } + std::optional elf_path_opt = input_data["elf_path"].value(); + if (elf_path_opt.has_value()) { + elf_path = concat_if_not_empty(basedir, elf_path_opt.value()); + } - // Patches section (optional) - const toml::value& patches_data = toml::find_or(config_data, "patches", toml::value{}); - if (patches_data.type() != toml::value_t::empty) { - // Stubs array (optional) - stubbed_funcs = get_stubbed_funcs(patches_data); + std::optional symbols_file_path_opt = input_data["symbols_file_path"].value(); + if (symbols_file_path_opt.has_value()) { + symbols_file_path = concat_if_not_empty(basedir, symbols_file_path_opt.value()); + } - // Ignored funcs array (optional) - ignored_funcs = get_ignored_funcs(patches_data); + std::optional rom_file_path_opt = input_data["rom_file_path"].value(); + if (rom_file_path_opt.has_value()) { + rom_file_path = concat_if_not_empty(basedir, rom_file_path_opt.value()); + } - // Functions (optional) - declared_funcs = get_declared_funcs(patches_data); + std::optional output_func_path_opt = input_data["output_func_path"].value(); + if (output_func_path_opt.has_value()) { + output_func_path = concat_if_not_empty(basedir, output_func_path_opt.value()); + } + else { + throw toml::parse_error("Missing output_func_path in config file", input_data.node()->source()); + } - // Single-instruction patches (optional) - instruction_patches = get_instruction_patches(patches_data); + std::optional relocatable_sections_path_opt = input_data["relocatable_sections_path"].value(); + if (relocatable_sections_path_opt.has_value()) { + relocatable_sections_path = concat_if_not_empty(basedir, relocatable_sections_path_opt.value()); + } + else { + relocatable_sections_path = ""; + } - // Manual function sizes (optional) - manual_func_sizes = get_func_sizes(patches_data); - } - } - catch (const toml::syntax_error& err) { - fmt::print(stderr, "Syntax error in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); - return; - } - catch (const toml::type_error& err) { - fmt::print(stderr, "Incorrect type in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); - return; - } - catch (const value_error& err) { - fmt::print(stderr, "Invalid value in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); - return; - } - catch (const std::out_of_range& err) { - fmt::print(stderr, "Missing value in config file, full error:\n{}\n", err.what()); - return; - } + std::optional uses_mips3_float_mode_opt = input_data["uses_mips3_float_mode"].value(); + if (uses_mips3_float_mode_opt.has_value()) { + uses_mips3_float_mode = uses_mips3_float_mode_opt.value(); + } + else { + uses_mips3_float_mode = false; + } + + std::optional bss_section_suffix_opt = input_data["bss_section_suffix"].value(); + if (bss_section_suffix_opt.has_value()) { + bss_section_suffix = bss_section_suffix_opt.value(); + } + else { + bss_section_suffix = ".bss"; + } + + std::optional single_file_output_opt = input_data["single_file_output"].value(); + if (single_file_output_opt.has_value()) { + single_file_output = single_file_output_opt.value(); + } + else { + single_file_output = false; + } + + std::optional use_absolute_symbols_opt = input_data["use_absolute_symbols"].value(); + if (use_absolute_symbols_opt.has_value()) { + use_absolute_symbols = use_absolute_symbols_opt.value(); + } + else { + use_absolute_symbols = false; + } + + // Manual functions (optional) + toml::node_view manual_functions_data = input_data["manual_funcs"]; + if (manual_functions_data.is_array()) { + const toml::array* array = manual_functions_data.as_array(); + get_manual_funcs(array); + } + + // Patches section (optional) + toml::node_view patches_data = config_data["patches"]; + if (patches_data.is_table()) { + const toml::table* table = patches_data.as_table(); + + // Stubs array (optional) + stubbed_funcs = get_stubbed_funcs(table); + + // Ignored funcs array (optional) + ignored_funcs = get_ignored_funcs(table); + + // Functions (optional) + declared_funcs = get_declared_funcs(table); + + // Single-instruction patches (optional) + instruction_patches = get_instruction_patches(table); + + // Manual function sizes (optional) + manual_func_sizes = get_func_sizes(table); + } + } + catch (const toml::parse_error& err) { + std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; + return; + } // No errors occured, so mark this config file as good. bad = false; } + +const std::unordered_map reloc_type_name_map { + { "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE }, + { "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 }, + { "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 }, + { "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 }, + { "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 }, + { "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 }, + { "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 }, + { "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 }, +}; + +RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) { + auto find_it = reloc_type_name_map.find(reloc_type_name); + if (find_it != reloc_type_name_map.end()) { + return find_it->second; + } + return RecompPort::RelocType::R_MIPS_NONE; +} + +bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, RecompPort::Context& out) { + RecompPort::Context ret{}; + + try { + const toml::table config_data = toml::parse_file(symbol_file_path.u8string()); + const toml::node_view config_sections_value = config_data["section"]; + + if (!config_sections_value.is_array()) { + return false; + } + + const toml::array* config_sections = config_sections_value.as_array(); + ret.section_functions.resize(config_sections->size()); + + config_sections->for_each([&ret, &rom](auto&& el) { + if constexpr (toml::is_table) { + std::optional rom_addr = el["rom"].template value(); + std::optional vram_addr = el["vram"].template value(); + std::optional size = el["size"].template value(); + std::optional name = el["name"].template value(); + + if (!rom_addr.has_value() || !vram_addr.has_value() || !size.has_value() || !name.has_value()) { + throw toml::parse_error("Section entry missing required field(s)", el.source()); + } + + size_t section_index = ret.sections.size(); + + Section& section = ret.sections.emplace_back(Section{}); + section.rom_addr = rom_addr.value(); + section.ram_addr = vram_addr.value(); + section.size = size.value(); + section.name = name.value(); + section.executable = true; + + // Read functions for the section. + const toml::node_view cur_functions_value = el["functions"]; + if (!cur_functions_value.is_array()) { + throw toml::parse_error("Invalid functions array", cur_functions_value.node()->source()); + } + + const toml::array* cur_functions = cur_functions_value.as_array(); + cur_functions->for_each([&ret, &rom, §ion, section_index](auto&& func_el) { + size_t function_index = ret.functions.size(); + + if constexpr (toml::is_table) { + std::optional name = func_el["name"].template value(); + std::optional vram_addr = func_el["vram"].template value(); + std::optional func_size_ = func_el["size"].template value(); + + if (!name.has_value() || !vram_addr.has_value() || !func_size_.has_value()) { + throw toml::parse_error("Function symbol entry is missing required field(s)", func_el.source()); + } + + uint32_t func_size = func_size_.value(); + + Function cur_func{}; + cur_func.name = name.value(); + cur_func.vram = vram_addr.value(); + cur_func.rom = cur_func.vram - section.ram_addr + section.rom_addr; + cur_func.section_index = section_index; + + if (cur_func.vram & 0b11) { + // Function isn't word aligned in vram. + throw toml::parse_error("Function's vram address isn't word aligned", func_el.source()); + } + + if (cur_func.rom & 0b11) { + // Function isn't word aligned in rom. + throw toml::parse_error("Function's rom address isn't word aligned", func_el.source()); + } + + if (cur_func.rom + func_size > rom.size()) { + // Function is out of bounds of the provided rom. + throw toml::parse_error("Functio is out of bounds of the provided rom", func_el.source()); + } + + // Get the function's words from the rom. + cur_func.words.reserve(func_size / sizeof(uint32_t)); + for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) { + cur_func.words.push_back(*reinterpret_cast(rom.data() + rom_addr)); + } + + section.function_addrs.push_back(cur_func.vram); + ret.functions_by_name[cur_func.name] = function_index; + ret.functions_by_vram[cur_func.vram].push_back(function_index); + ret.section_functions[section_index].push_back(function_index); + + ret.functions.emplace_back(std::move(cur_func)); + } + else { + throw toml::parse_error("Invalid function symbol entry", func_el.source()); + } + }); + + // Check if relocs exist for the section and read them if so. + const toml::node_view relocs_value = el["relocs"]; + if (relocs_value.is_array()) { + // Mark the section as relocatable, since it has relocs. + section.relocatable = true; + + // Read relocs for the section. + const toml::array* relocs_array = relocs_value.as_array(); + relocs_array->for_each([&ret, &rom, §ion, section_index](auto&& reloc_el) { + if constexpr (toml::is_table) { + std::optional vram = reloc_el["vram"].template value(); + std::optional target_vram = reloc_el["target_vram"].template value(); + std::optional type_string = reloc_el["type"].template value(); + + if (!vram.has_value() || !target_vram.has_value() || !type_string.has_value()) { + throw toml::parse_error("Reloc entry missing required field(s)", reloc_el.source()); + } + + RelocType reloc_type = reloc_type_from_name(type_string.value()); + + // TODO also accept MIPS32 for TLB relocations. + if (reloc_type != RelocType::R_MIPS_HI16 && reloc_type != RelocType::R_MIPS_LO16) { + throw toml::parse_error("Invalid reloc entry type", reloc_el.source()); + } + + Reloc cur_reloc{}; + cur_reloc.address = vram.value(); + cur_reloc.target_address = target_vram.value(); + cur_reloc.symbol_index = (uint32_t)-1; + cur_reloc.target_section = section_index; + cur_reloc.type = reloc_type; + + section.relocs.emplace_back(cur_reloc); + } + else { + throw toml::parse_error("Invalid reloc entry", reloc_el.source()); + } + }); + } + else { + section.relocatable = false; + } + } else { + throw toml::parse_error("Invalid section entry", el.source()); + } + }); + } + catch (const toml::parse_error& err) { + std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; + return false; + } + + ret.rom = std::move(rom); + out = std::move(ret); + return true; +} diff --git a/src/main.cpp b/src/main.cpp index 6cb8fd2..00f6d9e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1018,7 +1018,6 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co reloc_out.address = rel_offset; reloc_out.symbol_index = rel_symbol; reloc_out.type = static_cast(rel_type); - reloc_out.needs_relocation = false; std::string rel_symbol_name; ELFIO::Elf64_Addr rel_symbol_value; @@ -1033,12 +1032,6 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co reloc_out.target_section = rel_symbol_section_index; - bool rel_needs_relocation = false; - - if (rel_symbol_section_index < context.sections.size()) { - rel_needs_relocation = context.sections[rel_symbol_section_index].relocatable; - } - // Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf) if (reloc_out.type == RecompPort::RelocType::R_MIPS_LO16) { if (prev_hi) { @@ -1212,6 +1205,79 @@ bool recompile_single_function(const RecompPort::Context& context, const RecompP return true; } +std::vector reloc_names { + "R_MIPS_NONE ", + "R_MIPS_16", + "R_MIPS_32", + "R_MIPS_REL32", + "R_MIPS_26", + "R_MIPS_HI16", + "R_MIPS_LO16", + "R_MIPS_GPREL16", +}; + +void dump_context(const RecompPort::Context& context, const std::filesystem::path& path) { + std::ofstream context_file {path}; + + for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { + const RecompPort::Section& section = context.sections[section_index]; + const std::vector& section_funcs = context.section_functions[section_index]; + if (!section_funcs.empty()) { + fmt::print(context_file, + "# Autogenerated from an ELF via N64Recomp\n" + "[[section]]\n" + "name = \"{}\"\n" + "rom = 0x{:08X}\n" + "vram = 0x{:08X}\n" + "size = 0x{:X}\n" + "\n", + section.name, section.rom_addr, section.ram_addr, section.size); + + if (!section.relocs.empty()) { + fmt::print(context_file, "relocs = [\n"); + + for (const RecompPort::Reloc& reloc : section.relocs) { + if (reloc.target_section == section_index || reloc.target_section == section.bss_section_index) { + // TODO allow MIPS32 relocs for TLB mapping support. + if (reloc.type == RecompPort::RelocType::R_MIPS_HI16 || reloc.type == RecompPort::RelocType::R_MIPS_LO16) { + fmt::print(context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n", + reloc_names[static_cast(reloc.type)], reloc.address, reloc.target_address); + } + } + } + + fmt::print(context_file, "]\n\n"); + } + + fmt::print(context_file, "functions = [\n"); + + for (const size_t& function_index : section_funcs) { + const RecompPort::Function& func = context.functions[function_index]; + fmt::print(context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n", + func.name, func.vram, func.words.size() * sizeof(func.words[0])); + } + + fmt::print(context_file, "]\n\n"); + } + } +} + +static std::vector read_file(const std::filesystem::path& path) { + std::vector ret; + + std::ifstream file{ path, std::ios::binary}; + + if (file.good()) { + file.seekg(0, std::ios::end); + ret.resize(file.tellg()); + file.seekg(0, std::ios::beg); + + file.read(reinterpret_cast(ret.data()), ret.size()); + } + + return ret; +} + int main(int argc, char** argv) { auto exit_failure = [] (const std::string& error_str) { fmt::vprint(stderr, error_str, fmt::make_format_args()); @@ -1230,7 +1296,6 @@ int main(int argc, char** argv) { exit_failure(fmt::format("Failed to load config file: {}\n", config_path)); } - ELFIO::elfio elf_file; RabbitizerConfig_Cfg.pseudos.pseudoMove = false; RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false; RabbitizerConfig_Cfg.pseudos.pseudoBnez = false; @@ -1248,52 +1313,118 @@ int main(int argc, char** argv) { std::unordered_set relocatable_sections{}; relocatable_sections.insert(relocatable_sections_ordered.begin(), relocatable_sections_ordered.end()); - if (!elf_file.load(config.elf_path.string())) { - exit_failure("Failed to load provided elf file\n"); + RecompPort::Context context{}; + + if (!config.elf_path.empty() && !config.symbols_file_path.empty()) { + exit_failure("Config file cannot provide both an elf and a symbols file\n"); } - if (elf_file.get_class() != ELFIO::ELFCLASS32) { - exit_failure("Incorrect elf class\n"); + // Build a context from the provided elf file. + if (!config.elf_path.empty()) { + ELFIO::elfio elf_file; + + if (!elf_file.load(config.elf_path.string())) { + exit_failure("Failed to load provided elf file\n"); + } + + if (elf_file.get_class() != ELFIO::ELFCLASS32) { + exit_failure("Incorrect elf class\n"); + } + + if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) { + exit_failure("Incorrect endianness\n"); + } + + context = { elf_file }; + context.relocatable_sections = std::move(relocatable_sections); + + // Read all of the sections in the elf and look for the symbol table section + ELFIO::section* symtab_section = read_sections(context, config, elf_file); + + // Search the sections to see if any are overlays or TLB-mapped + analyze_sections(context, elf_file); + + // If no symbol table was found then exit + if (symtab_section == nullptr) { + exit_failure("No symbol table section found\n"); + } + + // Manually sized functions + for (const auto& func_size : config.manual_func_sizes) { + context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); + } + + // Read all of the symbols in the elf and look for the entrypoint function + bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols); + + // Add any manual functions + add_manual_functions(context, elf_file, config.manual_functions); + + if (config.has_entrypoint && !found_entrypoint_func) { + exit_failure("Could not find entrypoint function\n"); + } + } + // Build a context from the provided symbols file. + else if (!config.symbols_file_path.empty()) { + if (config.rom_file_path.empty()) { + exit_failure("A ROM file must be provided when using a symbols file\n"); + } + + std::vector rom = read_file(config.rom_file_path); + if (rom.empty()) { + exit_failure("Failed to load ROM file: " + config.rom_file_path.string() + "\n"); + } + + if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context)) { + exit_failure("Failed to load symbols file\n"); + } + + auto rename_function = [&context](size_t func_index, const std::string& new_name) { + RecompPort::Function& func = context.functions[func_index]; + + context.functions_by_name.erase(func.name); + func.name = new_name; + context.functions_by_name[func.name] = func_index; + }; + + for (size_t func_index = 0; func_index < context.functions.size(); func_index++) { + RecompPort::Function& func = context.functions[func_index]; + if (reimplemented_funcs.contains(func.name)) { + rename_function(func_index, func.name + "_recomp"); + func.reimplemented = true; + func.ignored = true; + } else if (ignored_funcs.contains(func.name)) { + rename_function(func_index, func.name + "_recomp"); + func.ignored = true; + } else if (renamed_funcs.contains(func.name)) { + rename_function(func_index, func.name + "_recomp"); + func.ignored = false; + } + } + + + if (config.has_entrypoint) { + bool found_entrypoint = false; + + for (uint32_t func_index : context.functions_by_vram[config.entrypoint]) { + auto& func = context.functions[func_index]; + if (func.rom == 0x1000) { + rename_function(func_index, "recomp_entrypoint"); + found_entrypoint = true; + break; + } + } + + if (!found_entrypoint) { + exit_failure("No entrypoint provided in symbol file\n"); + } + } + + } + else { + exit_failure("Config file must provide either an elf or a symbols file\n"); } - if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) { - exit_failure("Incorrect endianness\n"); - } - - RecompPort::Context context{ elf_file }; - context.relocatable_sections = std::move(relocatable_sections); - - // Read all of the sections in the elf and look for the symbol table section - ELFIO::section* symtab_section = read_sections(context, config, elf_file); - - // Search the sections to see if any are overlays or TLB-mapped - analyze_sections(context, elf_file); - - // If no symbol table was found then exit - if (symtab_section == nullptr) { - exit_failure("No symbol table section found\n"); - } - - // Functions that weren't declared properly and thus have no size in the elf - //context.manually_sized_funcs.emplace("guMtxF2L", 0x64); - //context.manually_sized_funcs.emplace("guScaleF", 0x48); - //context.manually_sized_funcs.emplace("guTranslateF", 0x48); - //context.manually_sized_funcs.emplace("guMtxIdentF", 0x48); - //context.manually_sized_funcs.emplace("sqrtf", 0x8); - //context.manually_sized_funcs.emplace("guMtxIdent", 0x4C); - for (const auto& func_size : config.manual_func_sizes) { - context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); - } - - // Read all of the symbols in the elf and look for the entrypoint function - bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols); - - // Add any manual functions - add_manual_functions(context, elf_file, config.manual_functions); - - if (config.has_entrypoint && !found_entrypoint_func) { - exit_failure("Could not find entrypoint function\n"); - } fmt::print("Function count: {}\n", context.functions.size()); @@ -1312,6 +1443,11 @@ int main(int argc, char** argv) { std::vector> static_funcs_by_section{ context.sections.size() }; + // TODO expose a way to dump the context from the command line. Make sure not to rename functions when doing so. + //fmt::print("Dumping context\n"); + //dump_context(context, "dump.toml"); + //return 0; + fmt::print("Working dir: {}\n", std::filesystem::current_path().string()); // Stub out any functions specified in the config file. From 94b59d56f70228c3d3e3062cf1b1ab6ce42153c0 Mon Sep 17 00:00:00 2001 From: Anghelo Carvajal Date: Fri, 17 May 2024 22:38:19 -0400 Subject: [PATCH 2/4] Mask the lower 3 bits of the `de` RSP operand (#55) --- RSPRecomp/src/rsp_recomp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index c3c35b8..dd8bf74 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -309,7 +309,7 @@ bool process_instruction(size_t instr_index, const std::vector Date: Fri, 31 May 2024 22:48:42 +0200 Subject: [PATCH 3/4] CI: Add Linux arm64 (#69) --- .github/workflows/validate.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 85723cb..82ce7d9 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -14,7 +14,9 @@ jobs: strategy: matrix: type: [ Debug, Release ] - os: [ ubuntu-latest, windows-latest, macos-13, macos-14 ] # macOS 13 is intel and macOS 14 is arm + # macos-13 is intel, macos-14 is arm, blaze/ubuntu-22.04 is arm + os: [ ubuntu-latest, windows-latest, macos-13, macos-14, blaze/ubuntu-22.04 ] + name: ${{ matrix.os }} (${{ (matrix.os == 'macos-14' || matrix.os == 'blaze/ubuntu-22.04') && 'arm64' || 'x64' }}, ${{ matrix.type }}) steps: - name: Checkout uses: actions/checkout@v3 @@ -23,7 +25,7 @@ jobs: - name: ccache uses: hendrikmuhs/ccache-action@v1.2 with: - key: ${{ runner.os }}-N64Recomp-ccache + key: ${{ matrix.os }}-N64Recomp-ccache-${{ matrix.type }} - name: Install Windows Dependencies if: runner.os == 'Windows' run: | @@ -47,13 +49,14 @@ jobs: # enable ccache export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build - cmake --build cmake-build --config Debug --target N64Recomp -j 8 + cmake -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build + cmake --build cmake-build --config ${{ matrix.type }} --target N64Recomp -j $(nproc) - name: Build N64Recomp (Windows) if: runner.os == 'Windows' run: |- # enable ccache set $env:PATH="$env:USERPROFILE/.cargo/bin;$env:PATH" + $cpuCores = (Get-CimInstance -ClassName Win32_Processor).NumberOfLogicalProcessors cmake -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_MAKE_PROGRAM=ninja -G Ninja -S . -B cmake-build - cmake --build cmake-build --config Debug --target N64Recomp -j 8 + cmake --build cmake-build --config ${{ matrix.type }} --target N64Recomp -j $cpuCores From 6eb7d5bd3ee7f0b79f3fd7adbe931dccbacf7e1b Mon Sep 17 00:00:00 2001 From: Gilles Siberlin Date: Sat, 1 Jun 2024 05:31:50 +0200 Subject: [PATCH 4/4] Implement hook insertion (#73) * Implement function hook insertion * Fix recompiled code indentation * Add _matherr to renamed_funcs * Replace after_vram by before_vram * Emit dummy value if relocatable_sections_ordered is empty --- RSPRecomp/src/rsp_recomp.cpp | 26 +- include/recomp_port.h | 8 + src/analysis.cpp | 476 +++++++++++++++++------------------ src/config.cpp | 151 +++++++---- src/main.cpp | 62 ++++- src/recompilation.cpp | 47 ++-- 6 files changed, 440 insertions(+), 330 deletions(-) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index dd8bf74..38b914a 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -564,25 +564,25 @@ struct RSPRecompilerConfig { }; std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { - if (!child.empty()) { - return parent / child; - } - return child; + if (!child.empty()) { + return parent / child; + } + return child; } template std::vector toml_to_vec(const toml::array* array) { - std::vector ret; + std::vector ret; - // Reserve room for all the funcs in the map. - ret.reserve(array->size()); + // Reserve room for all the funcs in the map. + ret.reserve(array->size()); array->for_each([&ret](auto&& el) { if constexpr (toml::is_integer) { ret.push_back(*el); } }); - return ret; + return ret; } template @@ -601,9 +601,9 @@ std::unordered_set toml_to_set(const toml::array* array) { bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) { RSPRecompilerConfig ret{}; - try { + try { const toml::table config_data = toml::parse_file(config_path.u8string()); - std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path(); + std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path(); std::optional text_offset = config_data["text_offset"].value(); if (text_offset.has_value()) { @@ -653,20 +653,20 @@ bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& throw toml::parse_error("Missing output_function_name in config file", config_data.source()); } - // Extra indirect branch targets (optional) + // Extra indirect branch targets (optional) const toml::node_view branch_targets_data = config_data["extra_indirect_branch_targets"]; if (branch_targets_data.is_array()) { const toml::array* branch_targets_array = branch_targets_data.as_array(); ret.extra_indirect_branch_targets = toml_to_vec(branch_targets_array); } - // Unsupported_instructions (optional) + // Unsupported_instructions (optional) const toml::node_view unsupported_instructions_data = config_data["unsupported_instructions"]; if (unsupported_instructions_data.is_array()) { const toml::array* unsupported_instructions_array = unsupported_instructions_data.as_array(); ret.unsupported_instructions = toml_to_set(unsupported_instructions_array); } - } + } catch (const toml::parse_error& err) { std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; return false; diff --git a/include/recomp_port.h b/include/recomp_port.h index a74a8b5..3828f01 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -40,6 +40,12 @@ namespace RecompPort { uint32_t value; }; + struct FunctionHook { + std::string func_name; + int32_t before_vram; + std::string text; + }; + struct FunctionSize { std::string func_name; uint32_t size_bytes; @@ -71,6 +77,7 @@ namespace RecompPort { std::vector ignored_funcs; DeclaredFunctionMap declared_funcs; std::vector instruction_patches; + std::vector function_hooks; std::vector manual_func_sizes; std::vector manual_functions; std::string bss_section_suffix; @@ -110,6 +117,7 @@ namespace RecompPort { bool ignored; bool reimplemented; bool stubbed; + std::unordered_map function_hooks; Function(uint32_t vram, uint32_t rom, std::vector words, std::string name, ELFIO::Elf_Half section_index, bool ignored = false, bool reimplemented = false, bool stubbed = false) : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {} diff --git a/src/analysis.cpp b/src/analysis.cpp index f98b737..5a689a0 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -10,271 +10,271 @@ extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue); // If 64-bit addressing is ever implemented, these will need to be changed to 64-bit values struct RegState { - // For tracking a register that will be used to load from RAM - uint32_t prev_lui; - uint32_t prev_addiu_vram; - uint32_t prev_addu_vram; - uint8_t prev_addend_reg; - bool valid_lui; - bool valid_addiu; - bool valid_addend; - // For tracking a register that has been loaded from RAM - uint32_t loaded_lw_vram; - uint32_t loaded_addu_vram; - uint32_t loaded_address; - uint8_t loaded_addend_reg; - bool valid_loaded; + // For tracking a register that will be used to load from RAM + uint32_t prev_lui; + uint32_t prev_addiu_vram; + uint32_t prev_addu_vram; + uint8_t prev_addend_reg; + bool valid_lui; + bool valid_addiu; + bool valid_addend; + // For tracking a register that has been loaded from RAM + uint32_t loaded_lw_vram; + uint32_t loaded_addu_vram; + uint32_t loaded_address; + uint8_t loaded_addend_reg; + bool valid_loaded; - RegState() = default; + RegState() = default; - void invalidate() { - prev_lui = 0; - prev_addiu_vram = 0; - prev_addu_vram = 0; - prev_addend_reg = 0; + void invalidate() { + prev_lui = 0; + prev_addiu_vram = 0; + prev_addu_vram = 0; + prev_addend_reg = 0; - valid_lui = false; - valid_addiu = false; - valid_addend = false; + valid_lui = false; + valid_addiu = false; + valid_addend = false; - loaded_lw_vram = 0; - loaded_addu_vram = 0; - loaded_address = 0; - loaded_addend_reg = 0; + loaded_lw_vram = 0; + loaded_addu_vram = 0; + loaded_address = 0; + loaded_addend_reg = 0; - valid_loaded = false; - } + valid_loaded = false; + } }; using InstrId = rabbitizer::InstrId::UniqueId; using RegId = rabbitizer::Registers::Cpu::GprO32; bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPort::Function& func, RecompPort::FunctionStats& stats, - RegState reg_states[32], std::vector& stack_states) { - // Temporary register state for tracking the register being operated on - RegState temp{}; + RegState reg_states[32], std::vector& stack_states) { + // Temporary register state for tracking the register being operated on + RegState temp{}; - int rd = (int)instr.GetO32_rd(); - int rs = (int)instr.GetO32_rs(); - int base = rs; - int rt = (int)instr.GetO32_rt(); - int sa = (int)instr.Get_sa(); + int rd = (int)instr.GetO32_rd(); + int rs = (int)instr.GetO32_rs(); + int base = rs; + int rt = (int)instr.GetO32_rt(); + int sa = (int)instr.Get_sa(); - uint16_t imm = instr.Get_immediate(); + uint16_t imm = instr.Get_immediate(); - auto check_move = [&]() { - if (rs == 0) { - // rs is zero so copy rt to rd - reg_states[rd] = reg_states[rt]; - } else if (rt == 0) { - // rt is zero so copy rs to rd - reg_states[rd] = reg_states[rs]; - } else { - // Not a move, invalidate rd - reg_states[rd].invalidate(); - } - }; + auto check_move = [&]() { + if (rs == 0) { + // rs is zero so copy rt to rd + reg_states[rd] = reg_states[rt]; + } else if (rt == 0) { + // rt is zero so copy rs to rd + reg_states[rd] = reg_states[rs]; + } else { + // Not a move, invalidate rd + reg_states[rd].invalidate(); + } + }; - switch (instr.getUniqueId()) { - case InstrId::cpu_lui: - // rt has been completely overwritten, so invalidate it - reg_states[rt].invalidate(); - reg_states[rt].prev_lui = (int16_t)imm << 16; - reg_states[rt].valid_lui = true; - break; - case InstrId::cpu_addiu: - // The target reg is a copy of the source reg plus an immediate, so copy the source reg's state - reg_states[rt] = reg_states[rs]; - // Set the addiu state if and only if there hasn't been an addiu already - if (!reg_states[rt].valid_addiu) { - reg_states[rt].prev_addiu_vram = (int16_t)imm; - reg_states[rt].valid_addiu = true; - } else { - // Otherwise, there have been 2 or more consecutive addius so invalidate the whole register - reg_states[rt].invalidate(); - } - break; - case InstrId::cpu_addu: - // rd has been completely overwritten, so invalidate it - temp.invalidate(); - // Exactly one of the two addend register states should have a valid lui at this time - if (reg_states[rs].valid_lui != reg_states[rt].valid_lui) { - // Track which of the two registers has the valid lui state and which is the addend - int valid_lui_reg = reg_states[rs].valid_lui ? rs : rt; - int addend_reg = reg_states[rs].valid_lui ? rt : rs; + switch (instr.getUniqueId()) { + case InstrId::cpu_lui: + // rt has been completely overwritten, so invalidate it + reg_states[rt].invalidate(); + reg_states[rt].prev_lui = (int16_t)imm << 16; + reg_states[rt].valid_lui = true; + break; + case InstrId::cpu_addiu: + // The target reg is a copy of the source reg plus an immediate, so copy the source reg's state + reg_states[rt] = reg_states[rs]; + // Set the addiu state if and only if there hasn't been an addiu already + if (!reg_states[rt].valid_addiu) { + reg_states[rt].prev_addiu_vram = (int16_t)imm; + reg_states[rt].valid_addiu = true; + } else { + // Otherwise, there have been 2 or more consecutive addius so invalidate the whole register + reg_states[rt].invalidate(); + } + break; + case InstrId::cpu_addu: + // rd has been completely overwritten, so invalidate it + temp.invalidate(); + // Exactly one of the two addend register states should have a valid lui at this time + if (reg_states[rs].valid_lui != reg_states[rt].valid_lui) { + // Track which of the two registers has the valid lui state and which is the addend + int valid_lui_reg = reg_states[rs].valid_lui ? rs : rt; + int addend_reg = reg_states[rs].valid_lui ? rt : rs; - // Copy the lui reg's state into the destination reg, then set the destination reg's addend to the other operand - temp = reg_states[valid_lui_reg]; - temp.valid_addend = true; - temp.prev_addend_reg = addend_reg; - temp.prev_addu_vram = instr.getVram(); - } else { - // Check if this is a move - check_move(); - } - reg_states[rd] = temp; - break; - case InstrId::cpu_daddu: - case InstrId::cpu_or: - check_move(); - break; - case InstrId::cpu_sw: - // If this is a store to the stack, copy the state of rt into the stack at the given offset - if (base == (int)RegId::GPR_O32_sp) { - if ((imm & 0b11) != 0) { - fmt::print(stderr, "Invalid alignment on offset for sw to stack: {}\n", (int16_t)imm); - return false; - } - if (((int16_t)imm) < 0) { - fmt::print(stderr, "Negative offset for sw to stack: {}\n", (int16_t)imm); - return false; - } - size_t stack_offset = imm / 4; - if (stack_offset >= stack_states.size()) { - stack_states.resize(stack_offset + 1); - } - stack_states[stack_offset] = reg_states[rt]; - } - break; - case InstrId::cpu_lw: - // rt has been completely overwritten, so invalidate it - temp.invalidate(); - // If this is a load from the stack, copy the state of the stack at the given offset to rt - if (base == (int)RegId::GPR_O32_sp) { - if ((imm & 0b11) != 0) { - fmt::print(stderr, "Invalid alignment on offset for lw from stack: {}\n", (int16_t)imm); - return false; - } - if (((int16_t)imm) < 0) { - fmt::print(stderr, "Negative offset for lw from stack: {}\n", (int16_t)imm); - return false; - } - size_t stack_offset = imm / 4; - if (stack_offset >= stack_states.size()) { - stack_states.resize(stack_offset + 1); - } - temp = stack_states[stack_offset]; - } - // If the base register has a valid lui state and a valid addend before this, then this may be a load from a jump table - else if (reg_states[base].valid_lui && reg_states[base].valid_addend) { - // Exactly one of the lw and the base reg should have a valid lo16 value - bool nonzero_immediate = imm != 0; - if (nonzero_immediate != reg_states[base].valid_addiu) { - uint32_t lo16; - if (nonzero_immediate) { - lo16 = (int16_t)imm; - } else { - lo16 = reg_states[base].prev_addiu_vram; - } + // Copy the lui reg's state into the destination reg, then set the destination reg's addend to the other operand + temp = reg_states[valid_lui_reg]; + temp.valid_addend = true; + temp.prev_addend_reg = addend_reg; + temp.prev_addu_vram = instr.getVram(); + } else { + // Check if this is a move + check_move(); + } + reg_states[rd] = temp; + break; + case InstrId::cpu_daddu: + case InstrId::cpu_or: + check_move(); + break; + case InstrId::cpu_sw: + // If this is a store to the stack, copy the state of rt into the stack at the given offset + if (base == (int)RegId::GPR_O32_sp) { + if ((imm & 0b11) != 0) { + fmt::print(stderr, "Invalid alignment on offset for sw to stack: {}\n", (int16_t)imm); + return false; + } + if (((int16_t)imm) < 0) { + fmt::print(stderr, "Negative offset for sw to stack: {}\n", (int16_t)imm); + return false; + } + size_t stack_offset = imm / 4; + if (stack_offset >= stack_states.size()) { + stack_states.resize(stack_offset + 1); + } + stack_states[stack_offset] = reg_states[rt]; + } + break; + case InstrId::cpu_lw: + // rt has been completely overwritten, so invalidate it + temp.invalidate(); + // If this is a load from the stack, copy the state of the stack at the given offset to rt + if (base == (int)RegId::GPR_O32_sp) { + if ((imm & 0b11) != 0) { + fmt::print(stderr, "Invalid alignment on offset for lw from stack: {}\n", (int16_t)imm); + return false; + } + if (((int16_t)imm) < 0) { + fmt::print(stderr, "Negative offset for lw from stack: {}\n", (int16_t)imm); + return false; + } + size_t stack_offset = imm / 4; + if (stack_offset >= stack_states.size()) { + stack_states.resize(stack_offset + 1); + } + temp = stack_states[stack_offset]; + } + // If the base register has a valid lui state and a valid addend before this, then this may be a load from a jump table + else if (reg_states[base].valid_lui && reg_states[base].valid_addend) { + // Exactly one of the lw and the base reg should have a valid lo16 value + bool nonzero_immediate = imm != 0; + if (nonzero_immediate != reg_states[base].valid_addiu) { + uint32_t lo16; + if (nonzero_immediate) { + lo16 = (int16_t)imm; + } else { + lo16 = reg_states[base].prev_addiu_vram; + } - uint32_t address = reg_states[base].prev_lui + lo16; - temp.valid_loaded = true; - temp.loaded_lw_vram = instr.getVram(); - temp.loaded_address = address; - temp.loaded_addend_reg = reg_states[base].prev_addend_reg; - temp.loaded_addu_vram = reg_states[base].prev_addu_vram; - } - } - reg_states[rt] = temp; - break; - case InstrId::cpu_jr: - // Ignore jr $ra - if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { - break; - } - // Check if the source reg has a valid loaded state and if so record that as a jump table - if (reg_states[rs].valid_loaded) { - stats.jump_tables.emplace_back( - reg_states[rs].loaded_address, - reg_states[rs].loaded_addend_reg, - 0, - reg_states[rs].loaded_lw_vram, - reg_states[rs].loaded_addu_vram, - instr.getVram(), - std::vector{} - ); - } else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) { - uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui; - stats.absolute_jumps.emplace_back( - address, - instr.getVram() - ); - } - // Allow tail calls (TODO account for trailing nops due to bad function splits) - else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) { - // Inconclusive analysis - fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name); - return false; - } - break; - default: - if (instr.modifiesRd()) { - reg_states[rd].invalidate(); - } - if (instr.modifiesRt()) { - reg_states[rt].invalidate(); - } - break; - } - return true; + uint32_t address = reg_states[base].prev_lui + lo16; + temp.valid_loaded = true; + temp.loaded_lw_vram = instr.getVram(); + temp.loaded_address = address; + temp.loaded_addend_reg = reg_states[base].prev_addend_reg; + temp.loaded_addu_vram = reg_states[base].prev_addu_vram; + } + } + reg_states[rt] = temp; + break; + case InstrId::cpu_jr: + // Ignore jr $ra + if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { + break; + } + // Check if the source reg has a valid loaded state and if so record that as a jump table + if (reg_states[rs].valid_loaded) { + stats.jump_tables.emplace_back( + reg_states[rs].loaded_address, + reg_states[rs].loaded_addend_reg, + 0, + reg_states[rs].loaded_lw_vram, + reg_states[rs].loaded_addu_vram, + instr.getVram(), + std::vector{} + ); + } else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) { + uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui; + stats.absolute_jumps.emplace_back( + address, + instr.getVram() + ); + } + // Allow tail calls (TODO account for trailing nops due to bad function splits) + else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) { + // Inconclusive analysis + fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name); + return false; + } + break; + default: + if (instr.modifiesRd()) { + reg_states[rd].invalidate(); + } + if (instr.modifiesRt()) { + reg_states[rt].invalidate(); + } + break; + } + return true; } bool RecompPort::analyze_function(const RecompPort::Context& context, const RecompPort::Function& func, - const std::vector& instructions, RecompPort::FunctionStats& stats) { - // Create a state to track each register (r0 won't be used) - RegState reg_states[32] {}; - std::vector stack_states{}; + const std::vector& instructions, RecompPort::FunctionStats& stats) { + // Create a state to track each register (r0 won't be used) + RegState reg_states[32] {}; + std::vector stack_states{}; - // Look for jump tables - // A linear search through the func won't be accurate due to not taking control flow into account, but it'll work for finding jtables - for (const auto& instr : instructions) { - if (!analyze_instruction(instr, func, stats, reg_states, stack_states)) { - return false; - } - } + // Look for jump tables + // A linear search through the func won't be accurate due to not taking control flow into account, but it'll work for finding jtables + for (const auto& instr : instructions) { + if (!analyze_instruction(instr, func, stats, reg_states, stack_states)) { + return false; + } + } - // Sort jump tables by their address - std::sort(stats.jump_tables.begin(), stats.jump_tables.end(), - [](const JumpTable& a, const JumpTable& b) - { - return a.vram < b.vram; - }); + // Sort jump tables by their address + std::sort(stats.jump_tables.begin(), stats.jump_tables.end(), + [](const JumpTable& a, const JumpTable& b) + { + return a.vram < b.vram; + }); - // Determine jump table sizes - for (size_t i = 0; i < stats.jump_tables.size(); i++) { - JumpTable& cur_jtbl = stats.jump_tables[i]; - uint32_t end_address = (uint32_t)-1; - uint32_t entry_count = 0; - uint32_t vram = cur_jtbl.vram; + // Determine jump table sizes + for (size_t i = 0; i < stats.jump_tables.size(); i++) { + JumpTable& cur_jtbl = stats.jump_tables[i]; + uint32_t end_address = (uint32_t)-1; + uint32_t entry_count = 0; + uint32_t vram = cur_jtbl.vram; - if (i < stats.jump_tables.size() - 1) { - end_address = stats.jump_tables[i + 1].vram; - } + if (i < stats.jump_tables.size() - 1) { + end_address = stats.jump_tables[i + 1].vram; + } - // TODO this assumes that the jump table is in the same section as the function itself - cur_jtbl.rom = cur_jtbl.vram + func.rom - func.vram; + // TODO this assumes that the jump table is in the same section as the function itself + cur_jtbl.rom = cur_jtbl.vram + func.rom - func.vram; - while (vram < end_address) { - // Retrieve the current entry of the jump table - // TODO same as above - uint32_t rom_addr = vram + func.rom - func.vram; - uint32_t jtbl_word = byteswap(*reinterpret_cast(&context.rom[rom_addr])); - // Check if the entry is a valid address in the current function - if (jtbl_word < func.vram || jtbl_word > func.vram + func.words.size() * sizeof(func.words[0])) { - // If it's not then this is the end of the jump table - break; - } - cur_jtbl.entries.push_back(jtbl_word); - vram += 4; - } + while (vram < end_address) { + // Retrieve the current entry of the jump table + // TODO same as above + uint32_t rom_addr = vram + func.rom - func.vram; + uint32_t jtbl_word = byteswap(*reinterpret_cast(&context.rom[rom_addr])); + // Check if the entry is a valid address in the current function + if (jtbl_word < func.vram || jtbl_word > func.vram + func.words.size() * sizeof(func.words[0])) { + // If it's not then this is the end of the jump table + break; + } + cur_jtbl.entries.push_back(jtbl_word); + vram += 4; + } - if (cur_jtbl.entries.size() == 0) { - fmt::print("Failed to determine size of jump table at 0x{:08X} for instruction at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.jr_vram); - return false; - } + if (cur_jtbl.entries.size() == 0) { + fmt::print("Failed to determine size of jump table at 0x{:08X} for instruction at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.jr_vram); + return false; + } - //fmt::print("Jtbl at 0x{:08X} (rom 0x{:08X}) with {} entries used by instr at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.rom, cur_jtbl.entries.size(), cur_jtbl.jr_vram); - } + //fmt::print("Jtbl at 0x{:08X} (rom 0x{:08X}) with {} entries used by instr at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.rom, cur_jtbl.entries.size(), cur_jtbl.jr_vram); + } - return true; + return true; } diff --git a/src/config.cpp b/src/config.cpp index b22f9bf..e106ae6 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -5,10 +5,10 @@ #include "recomp_port.h" std::vector get_manual_funcs(const toml::array* manual_funcs_array) { - std::vector ret; + std::vector ret; - // Reserve room for all the funcs in the map. - ret.reserve(manual_funcs_array->size()); + // Reserve room for all the funcs in the map. + ret.reserve(manual_funcs_array->size()); manual_funcs_array->for_each([&ret](auto&& el) { if constexpr (toml::is_table) { std::optional func_name = el["name"].template value(); @@ -27,13 +27,13 @@ std::vector get_manual_funcs(const toml::array* manu } }); - return ret; + return ret; } std::vector get_stubbed_funcs(const toml::table* patches_data) { - std::vector stubbed_funcs{}; + std::vector stubbed_funcs{}; - // Check if the stubs array exists. + // Check if the stubs array exists. const toml::node_view stubs_data = (*patches_data)["stubs"]; if (stubs_data.is_array()) { @@ -53,13 +53,13 @@ std::vector get_stubbed_funcs(const toml::table* patches_data) { }); } - return stubbed_funcs; + return stubbed_funcs; } std::vector get_ignored_funcs(const toml::table* patches_data) { - std::vector ignored_funcs{}; + std::vector ignored_funcs{}; - // Check if the ignored funcs array exists. + // Check if the ignored funcs array exists. const toml::node_view ignored_funcs_data = (*patches_data)["ignored"]; if (ignored_funcs_data.is_array()) { @@ -76,16 +76,16 @@ std::vector get_ignored_funcs(const toml::table* patches_data) { }); } - return ignored_funcs; + return ignored_funcs; } std::unordered_map arg_type_map{ - {"u32", RecompPort::FunctionArgType::u32}, - {"s32", RecompPort::FunctionArgType::s32}, + {"u32", RecompPort::FunctionArgType::u32}, + {"s32", RecompPort::FunctionArgType::s32}, }; std::vector parse_args(const toml::array* args_in) { - std::vector ret(args_in->size()); + std::vector ret(args_in->size()); args_in->for_each([&ret](auto&& el) { if constexpr (toml::is_string) { @@ -104,13 +104,13 @@ std::vector parse_args(const toml::array* args_in) } }); - return ret; + return ret; } RecompPort::DeclaredFunctionMap get_declared_funcs(const toml::table* patches_data) { - RecompPort::DeclaredFunctionMap declared_funcs{}; + RecompPort::DeclaredFunctionMap declared_funcs{}; - // Check if the func array exists. + // Check if the func array exists. const toml::node_view funcs_data = (*patches_data)["func"]; if (funcs_data.is_array()) { @@ -138,13 +138,13 @@ RecompPort::DeclaredFunctionMap get_declared_funcs(const toml::table* patches_da }); } - return declared_funcs; + return declared_funcs; } std::vector get_func_sizes(const toml::table* patches_data) { - std::vector func_sizes{}; + std::vector func_sizes{}; - // Check if the func size array exists. + // Check if the func size array exists. const toml::node_view funcs_data = (*patches_data)["function_sizes"]; if (funcs_data.is_array()) { const toml::array* sizes_array = funcs_data.as_array(); @@ -177,13 +177,13 @@ std::vector get_func_sizes(const toml::table* patches_ }); } - return func_sizes; + return func_sizes; } std::vector get_instruction_patches(const toml::table* patches_data) { - std::vector ret; + std::vector ret; - // Check if the instruction patch array exists. + // Check if the instruction patch array exists. const toml::node_view insn_patch_data = (*patches_data)["instruction"]; if (insn_patch_data.is_array()) { @@ -221,20 +221,64 @@ std::vector get_instruction_patches(const toml::ta }); } - return ret; + return ret; +} + +std::vector get_function_hooks(const toml::table* patches_data) { + std::vector ret; + + // Check if the function hook array exists. + const toml::node_view func_hook_data = (*patches_data)["hook"]; + + if (func_hook_data.is_array()) { + const toml::array* func_hook_array = func_hook_data.as_array(); + ret.reserve(func_hook_array->size()); + + // Copy all the hooks into the output vector. + func_hook_array->for_each([&ret](auto&& el) { + if constexpr (toml::is_table) { + const toml::table& cur_hook = *el.as_table(); + + // Get the vram and make sure it's 4-byte aligned. + std::optional before_vram = cur_hook["before_vram"].value(); + std::optional func_name = cur_hook["func"].value(); + std::optional text = cur_hook["text"].value(); + + if (!func_name.has_value() || !text.has_value()) { + throw toml::parse_error("Function hook is missing required value(s)", el.source()); + } + + if (before_vram.has_value() && before_vram.value() & 0b11) { + // Not properly aligned, so throw an error (and make it look like a normal toml one). + throw toml::parse_error("before_vram is not word-aligned", el.source()); + } + + ret.push_back(RecompPort::FunctionHook{ + .func_name = func_name.value(), + .before_vram = before_vram.has_value() ? (int32_t)before_vram.value() : 0, + .text = text.value(), + }); + } + else { + throw toml::parse_error("Invalid function hook entry", el.source()); + } + }); + } + + return ret; } std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { - if (!child.empty()) { - return parent / child; - } - return child; + if (!child.empty()) { + return parent / child; + } + return child; } RecompPort::Config::Config(const char* path) { - // Start this config out as bad so that it has to finish parsing without errors to be good. - entrypoint = 0; - bad = true; + // Start this config out as bad so that it has to finish parsing without errors to be good. + entrypoint = 0; + bad = true; toml::table config_data{}; try { @@ -348,6 +392,9 @@ RecompPort::Config::Config(const char* path) { // Manual function sizes (optional) manual_func_sizes = get_func_sizes(table); + + // Fonction hooks (optional) + function_hooks = get_function_hooks(table); } } catch (const toml::parse_error& err) { @@ -355,34 +402,34 @@ RecompPort::Config::Config(const char* path) { return; } - // No errors occured, so mark this config file as good. - bad = false; + // No errors occured, so mark this config file as good. + bad = false; } const std::unordered_map reloc_type_name_map { - { "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE }, - { "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 }, - { "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 }, - { "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 }, - { "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 }, - { "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 }, - { "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 }, - { "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 }, + { "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE }, + { "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 }, + { "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 }, + { "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 }, + { "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 }, + { "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 }, + { "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 }, + { "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 }, }; RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) { - auto find_it = reloc_type_name_map.find(reloc_type_name); - if (find_it != reloc_type_name_map.end()) { - return find_it->second; - } - return RecompPort::RelocType::R_MIPS_NONE; + auto find_it = reloc_type_name_map.find(reloc_type_name); + if (find_it != reloc_type_name_map.end()) { + return find_it->second; + } + return RecompPort::RelocType::R_MIPS_NONE; } bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, RecompPort::Context& out) { - RecompPort::Context ret{}; + RecompPort::Context ret{}; - try { - const toml::table config_data = toml::parse_file(symbol_file_path.u8string()); + try { + const toml::table config_data = toml::parse_file(symbol_file_path.u8string()); const toml::node_view config_sections_value = config_data["section"]; if (!config_sections_value.is_array()) { @@ -518,13 +565,13 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f throw toml::parse_error("Invalid section entry", el.source()); } }); - } + } catch (const toml::parse_error& err) { std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; return false; } - ret.rom = std::move(rom); - out = std::move(ret); - return true; + ret.rom = std::move(rom); + out = std::move(ret); + return true; } diff --git a/src/main.cpp b/src/main.cpp index 00f6d9e..8faae53 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -628,6 +628,7 @@ std::unordered_set renamed_funcs{ "div64_64", "div64_32", "__moddi3", + "_matherr", }; bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) { @@ -1499,6 +1500,41 @@ int main(int argc, char** argv) { func.words[instruction_index] = byteswap(patch.value); } + // Apply any function hooks. + for (const RecompPort::FunctionHook& patch : config.function_hooks) { + // Check if the specified function exists. + auto func_find = context.functions_by_name.find(patch.func_name); + if (func_find == context.functions_by_name.end()) { + // Function doesn't exist, present an error to the user instead of silently failing to stub it out. + // This helps prevent typos in the config file or functions renamed between versions from causing issues. + exit_failure(fmt::format("Function {} has a function hook but does not exist!", patch.func_name)); + } + + RecompPort::Function& func = context.functions[func_find->second]; + int32_t func_vram = func.vram; + + // Check that the function actually contains this vram address. + if (patch.before_vram < func_vram || patch.before_vram >= func_vram + func.words.size() * sizeof(func.words[0])) { + exit_failure(fmt::format("Function {} has a function hook for vram 0x{:08X} but doesn't contain that vram address!", patch.func_name, (uint32_t)patch.before_vram)); + } + + // No after_vram means this will be placed at the start of the function + size_t instruction_index = -1; + + // Calculate the instruction index. + if (patch.before_vram != 0) { + instruction_index = (static_cast(patch.before_vram) - func_vram) / sizeof(uint32_t); + } + + // Check if a function hook already exits for that instruction index. + auto hook_find = func.function_hooks.find(instruction_index); + if (hook_find != func.function_hooks.end()) { + exit_failure(fmt::format("Function {} already has a function hook for vram 0x{:08X}!", patch.func_name, (uint32_t)patch.before_vram)); + } + + func.function_hooks[instruction_index] = patch.text; + } + std::ofstream single_output_file; if (config.single_file_output) { @@ -1700,18 +1736,22 @@ int main(int argc, char** argv) { fmt::print(overlay_file, "static int overlay_sections_by_index[] = {{\n"); - for (const std::string& section : relocatable_sections_ordered) { - // Check if this is an empty overlay - if (section == "*") { - fmt::print(overlay_file, " -1,\n"); - } - else { - auto find_it = relocatable_section_indices.find(section); - if (find_it == relocatable_section_indices.end()) { - fmt::print(stderr, "Failed to find written section index of relocatable section: {}\n", section); - std::exit(EXIT_FAILURE); + if (relocatable_sections_ordered.empty()) { + fmt::print(overlay_file, " -1,\n"); + } else { + for (const std::string& section : relocatable_sections_ordered) { + // Check if this is an empty overlay + if (section == "*") { + fmt::print(overlay_file, " -1,\n"); + } + else { + auto find_it = relocatable_section_indices.find(section); + if (find_it == relocatable_section_indices.end()) { + fmt::print(stderr, "Failed to find written section index of relocatable section: {}\n", section); + std::exit(EXIT_FAILURE); + } + fmt::print(overlay_file, " {},\n", relocatable_section_indices[section]); } - fmt::print(overlay_file, " {},\n", relocatable_section_indices[section]); } } fmt::print(overlay_file, "}};\n"); diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 4b29725..e34ed3c 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -24,23 +24,33 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C const auto& instr = instructions[instr_index]; needs_link_branch = false; is_branch_likely = false; + uint32_t instr_vram = instr.getVram(); + + auto print_indent = [&]() { + fmt::print(output_file, " "); + }; + + auto hook_find = func.function_hooks.find(instr_index); + if (hook_find != func.function_hooks.end()) { + fmt::print(output_file, " {}\n", hook_find->second); + if (indent) { + print_indent(); + } + } // Output a comment with the original instruction if (instr.isBranch() || instr.getUniqueId() == InstrId::cpu_j) { - fmt::print(output_file, " // {}\n", instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric()))); + fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric()))); } else if (instr.getUniqueId() == InstrId::cpu_jal) { - fmt::print(output_file, " // {}\n", instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric()))); + fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric()))); } else { - fmt::print(output_file, " // {}\n", instr.disassemble(0)); + fmt::print(output_file, " // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0)); } - uint32_t instr_vram = instr.getVram(); - if (skipped_insns.contains(instr_vram)) { return true; } - bool at_reloc = false; bool reloc_handled = false; RecompPort::RelocType reloc_type = RecompPort::RelocType::R_MIPS_NONE; @@ -71,10 +81,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } } - auto print_indent = [&]() { - fmt::print(output_file, " "); - }; - auto print_line = [&](fmt::format_string fmt_str, Ts ...args) { print_indent(); fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...)); @@ -106,7 +112,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } }; - auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true) { + auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true, bool indent = false) { const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram); std::string jal_target_name; uint32_t section_vram_start = section.ram_addr; @@ -173,7 +179,11 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } } needs_link_branch = link_branch; - print_unconditional_branch("{}(rdram, ctx)", jal_target_name); + if (indent) { + print_unconditional_branch(" {}(rdram, ctx)", jal_target_name); + } else { + print_unconditional_branch("{}(rdram, ctx)", jal_target_name); + } return true; }; @@ -183,9 +193,9 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) { fmt::print(output_file, "{{\n "); fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target); - print_func_call(branch_target, false); - print_line("return"); - fmt::print(output_file, ";\n }}\n"); + print_func_call(branch_target, false, true); + print_line(" return"); + fmt::print(output_file, " }}\n"); return; } @@ -1103,7 +1113,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output " uint64_t hi = 0, lo = 0, result = 0;\n" " unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n" - " int c1cs = 0; \n", // cop1 conditional signal + " int c1cs = 0;\n", // cop1 conditional signal func.name); // Skip analysis and recompilation of this function is stubbed. @@ -1112,6 +1122,11 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re std::set branch_labels; instructions.reserve(func.words.size()); + auto hook_find = func.function_hooks.find(-1); + if (hook_find != func.function_hooks.end()) { + fmt::print(output_file, " {}\n", hook_find->second); + } + // First pass, disassemble each instruction and collect branch labels uint32_t vram = func.vram; for (uint32_t word : func.words) {