diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index 4cdfd65..dc59325 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -623,7 +623,6 @@ std::vector toml_to_vec(const toml::value& branch_targets_data) { } bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) { - std::ifstream config_file {config_path}; RSPRecompilerConfig ret{}; try { diff --git a/include/recomp_port.h b/include/recomp_port.h index 9c4b11b..eb33de1 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -63,6 +63,8 @@ namespace RecompPort { bool single_file_output; bool use_absolute_symbols; std::filesystem::path elf_path; + std::filesystem::path symbols_file_path; + std::filesystem::path rom_file_path; std::filesystem::path output_func_path; std::filesystem::path relocatable_sections_path; std::vector stubbed_funcs; @@ -130,7 +132,6 @@ namespace RecompPort { uint32_t symbol_index; uint32_t target_section; RelocType type; - bool needs_relocation; }; struct Section { @@ -175,6 +176,10 @@ namespace RecompPort { rom.reserve(8 * 1024 * 1024); executable_section_count = 0; } + + static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, Context& out); + + Context() = default; }; bool analyze_function(const Context& context, const Function& function, const std::vector& instructions, FunctionStats& stats); diff --git a/src/analysis.cpp b/src/analysis.cpp index 5068d85..f98b737 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -19,31 +19,31 @@ struct RegState { bool valid_addiu; bool valid_addend; // For tracking a register that has been loaded from RAM -uint32_t loaded_lw_vram; -uint32_t loaded_addu_vram; -uint32_t loaded_address; -uint8_t loaded_addend_reg; -bool valid_loaded; + uint32_t loaded_lw_vram; + uint32_t loaded_addu_vram; + uint32_t loaded_address; + uint8_t loaded_addend_reg; + bool valid_loaded; -RegState() = default; + RegState() = default; -void invalidate() { - prev_lui = 0; - prev_addiu_vram = 0; - prev_addu_vram = 0; - prev_addend_reg = 0; + void invalidate() { + prev_lui = 0; + prev_addiu_vram = 0; + prev_addu_vram = 0; + prev_addend_reg = 0; - valid_lui = false; - valid_addiu = false; - valid_addend = false; + valid_lui = false; + valid_addiu = false; + valid_addend = false; - loaded_lw_vram = 0; - loaded_addu_vram = 0; - loaded_address = 0; - loaded_addend_reg = 0; + loaded_lw_vram = 0; + loaded_addu_vram = 0; + loaded_address = 0; + loaded_addend_reg = 0; - valid_loaded = false; -} + valid_loaded = false; + } }; using InstrId = rabbitizer::InstrId::UniqueId; diff --git a/src/config.cpp b/src/config.cpp index dac6991..a65b71e 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -242,7 +242,15 @@ RecompPort::Config::Config(const char* path) { else { has_entrypoint = false; } - elf_path = concat_if_not_empty(basedir, toml::find(input_data, "elf_path")); + if (input_data.contains("elf_path")) { + elf_path = concat_if_not_empty(basedir, toml::find(input_data, "elf_path")); + } + if (input_data.contains("symbols_file_path")) { + symbols_file_path = concat_if_not_empty(basedir, toml::find(input_data, "symbols_file_path")); + } + if (input_data.contains("rom_file_path")) { + rom_file_path = concat_if_not_empty(basedir, toml::find(input_data, "rom_file_path")); + } output_func_path = concat_if_not_empty(basedir, toml::find(input_data, "output_func_path")); relocatable_sections_path = concat_if_not_empty(basedir, toml::find_or(input_data, "relocatable_sections_path", "")); uses_mips3_float_mode = toml::find_or(input_data, "uses_mips3_float_mode", false); @@ -295,3 +303,147 @@ RecompPort::Config::Config(const char* path) { // No errors occured, so mark this config file as good. bad = false; } + +const std::unordered_map reloc_type_name_map { + { "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE }, + { "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 }, + { "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 }, + { "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 }, + { "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 }, + { "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 }, + { "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 }, + { "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 }, +}; + +RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) { + auto find_it = reloc_type_name_map.find(reloc_type_name); + if (find_it != reloc_type_name_map.end()) { + return find_it->second; + } + return RecompPort::RelocType::R_MIPS_NONE; +} + +bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, RecompPort::Context& out) { + RecompPort::Context ret{}; + + try { + const toml::value config_data = toml::parse(symbol_file_path); + const toml::value config_sections_value = toml::find_or(config_data, "section", toml::value{}); + + if (config_sections_value.type() != toml::value_t::array) { + return false; + } + + const toml::array config_sections = config_sections_value.as_array(); + ret.section_functions.resize(config_sections.size()); + + for (const toml::value& section_value : config_sections) { + size_t section_index = ret.sections.size(); + + Section& section = ret.sections.emplace_back(Section{}); + section.rom_addr = toml::find(section_value, "rom"); + section.ram_addr = toml::find(section_value, "vram"); + section.size = toml::find(section_value, "size"); + section.name = toml::find(section_value, "name"); + section.executable = true; + + const toml::array& functions = toml::find(section_value, "functions"); + + // Read functions for the section. + for (const toml::value& function_value : functions) { + size_t function_index = ret.functions.size(); + + Function cur_func{}; + cur_func.name = toml::find(function_value, "name"); + cur_func.vram = toml::find(function_value, "vram"); + cur_func.rom = cur_func.vram - section.ram_addr + section.rom_addr; + cur_func.section_index = section_index; + + uint32_t func_size = toml::find(function_value, "size"); + + if (cur_func.vram & 0b11) { + // Function isn't word aligned in vram. + throw value_error(toml::detail::format_underline( + std::string{ std::source_location::current().function_name() } + ": function's vram address isn't word aligned!", { + {function_value.location(), ""} + }), function_value.location()); + } + + if (cur_func.rom & 0b11) { + // Function isn't word aligned in rom. + throw value_error(toml::detail::format_underline( + std::string{ std::source_location::current().function_name() } + ": function's rom address isn't word aligned!", { + {function_value.location(), ""} + }), function_value.location()); + } + + if (cur_func.rom + func_size > rom.size()) { + // Function is out of bounds of the provided rom. + throw value_error(toml::detail::format_underline( + std::string{ std::source_location::current().function_name() } + ": function is out of bounds of the provided rom!", { + {function_value.location(), ""} + }), function_value.location()); + } + + // Get the function's words from the rom. + cur_func.words.reserve(func_size / sizeof(uint32_t)); + for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) { + cur_func.words.push_back(*reinterpret_cast(rom.data() + rom_addr)); + } + + section.function_addrs.push_back(cur_func.vram); + ret.functions_by_name[cur_func.name] = function_index; + ret.functions_by_vram[cur_func.vram].push_back(function_index); + ret.section_functions[section_index].push_back(function_index); + + ret.functions.emplace_back(std::move(cur_func)); + } + + // Check if relocs exist for the section and read them if so. + const toml::value& relocs_value = toml::find_or(section_value, "relocs", toml::value{}); + if (relocs_value.type() == toml::value_t::array) { + // Mark the section as relocatable, since it has relocs. + section.relocatable = true; + + // Read relocs for the section. + for (const toml::value& reloc_value : relocs_value.as_array()) { + size_t reloc_index = ret.functions.size(); + + Reloc cur_reloc{}; + cur_reloc.address = toml::find(reloc_value, "vram"); + cur_reloc.target_address = toml::find(reloc_value, "target_vram"); + cur_reloc.symbol_index = (uint32_t)-1; + cur_reloc.target_section = section_index; + const std::string& reloc_type = toml::find(reloc_value, "type"); + cur_reloc.type = reloc_type_from_name(reloc_type); + + section.relocs.emplace_back(std::move(cur_reloc)); + } + } + else { + section.relocatable = false; + } + + } + } + catch (const toml::syntax_error& err) { + fmt::print(stderr, "Syntax error in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const toml::type_error& err) { + fmt::print(stderr, "Incorrect type in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const value_error& err) { + fmt::print(stderr, "Invalid value in config file on line {}, full error:\n{}\n", err.location().line(), err.what()); + return false; + } + catch (const std::out_of_range& err) { + fmt::print(stderr, "Missing value in config file, full error:\n{}\n", err.what()); + return false; + } + + ret.rom = std::move(rom); + out = std::move(ret); + return true; +} diff --git a/src/main.cpp b/src/main.cpp index 6cb8fd2..f48341c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1018,7 +1018,6 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co reloc_out.address = rel_offset; reloc_out.symbol_index = rel_symbol; reloc_out.type = static_cast(rel_type); - reloc_out.needs_relocation = false; std::string rel_symbol_name; ELFIO::Elf64_Addr rel_symbol_value; @@ -1033,12 +1032,6 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co reloc_out.target_section = rel_symbol_section_index; - bool rel_needs_relocation = false; - - if (rel_symbol_section_index < context.sections.size()) { - rel_needs_relocation = context.sections[rel_symbol_section_index].relocatable; - } - // Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf) if (reloc_out.type == RecompPort::RelocType::R_MIPS_LO16) { if (prev_hi) { @@ -1212,6 +1205,79 @@ bool recompile_single_function(const RecompPort::Context& context, const RecompP return true; } +std::vector reloc_names { + "R_MIPS_NONE ", + "R_MIPS_16", + "R_MIPS_32", + "R_MIPS_REL32", + "R_MIPS_26", + "R_MIPS_HI16", + "R_MIPS_LO16", + "R_MIPS_GPREL16", +}; + +void dump_context(const RecompPort::Context& context, const std::filesystem::path& path) { + std::ofstream context_file {path}; + + for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { + const RecompPort::Section& section = context.sections[section_index]; + const std::vector& section_funcs = context.section_functions[section_index]; + if (!section_funcs.empty()) { + fmt::print(context_file, + "# Autogenerated from an ELF via N64Recomp\n" + "[[section]]\n" + "name = \"{}\"\n" + "rom = 0x{:08X}\n" + "vram = 0x{:08X}\n" + "size = 0x{:X}\n" + "\n", + section.name, section.rom_addr, section.ram_addr, section.size); + + if (!section.relocs.empty()) { + fmt::print(context_file, "relocs = [\n"); + + for (const RecompPort::Reloc& reloc : section.relocs) { + if (reloc.target_section == section_index || reloc.target_section == section.bss_section_index) { + // TODO allow MIPS32 relocs for TLB mapping support. + if (reloc.type == RecompPort::RelocType::R_MIPS_HI16 || reloc.type == RecompPort::RelocType::R_MIPS_LO16) { + fmt::print(context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n", + reloc_names[static_cast(reloc.type)], reloc.address, reloc.target_address); + } + } + } + + fmt::print(context_file, "]\n\n"); + } + + fmt::print(context_file, "functions = [\n"); + + for (const ELFIO::Elf_Xword& function_index : section_funcs) { + const RecompPort::Function& func = context.functions[function_index]; + fmt::print(context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n", + func.name, func.vram, func.words.size() * sizeof(func.words[0])); + } + + fmt::print(context_file, "]\n\n"); + } + } +} + +static std::vector read_file(const std::filesystem::path& path) { + std::vector ret; + + std::ifstream file{ path, std::ios::binary}; + + if (file.good()) { + file.seekg(0, std::ios::end); + ret.resize(file.tellg()); + file.seekg(0, std::ios::beg); + + file.read(reinterpret_cast(ret.data()), ret.size()); + } + + return ret; +} + int main(int argc, char** argv) { auto exit_failure = [] (const std::string& error_str) { fmt::vprint(stderr, error_str, fmt::make_format_args()); @@ -1230,7 +1296,6 @@ int main(int argc, char** argv) { exit_failure(fmt::format("Failed to load config file: {}\n", config_path)); } - ELFIO::elfio elf_file; RabbitizerConfig_Cfg.pseudos.pseudoMove = false; RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false; RabbitizerConfig_Cfg.pseudos.pseudoBnez = false; @@ -1248,52 +1313,90 @@ int main(int argc, char** argv) { std::unordered_set relocatable_sections{}; relocatable_sections.insert(relocatable_sections_ordered.begin(), relocatable_sections_ordered.end()); - if (!elf_file.load(config.elf_path.string())) { - exit_failure("Failed to load provided elf file\n"); + RecompPort::Context context{}; + + if (!config.elf_path.empty() && !config.symbols_file_path.empty()) { + exit_failure("Config file cannot provide both an elf and a symbols file\n"); } - if (elf_file.get_class() != ELFIO::ELFCLASS32) { - exit_failure("Incorrect elf class\n"); + // Build a context from the provided elf file. + if (!config.elf_path.empty()) { + ELFIO::elfio elf_file; + + if (!elf_file.load(config.elf_path.string())) { + exit_failure("Failed to load provided elf file\n"); + } + + if (elf_file.get_class() != ELFIO::ELFCLASS32) { + exit_failure("Incorrect elf class\n"); + } + + if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) { + exit_failure("Incorrect endianness\n"); + } + + context = { elf_file }; + context.relocatable_sections = std::move(relocatable_sections); + + // Read all of the sections in the elf and look for the symbol table section + ELFIO::section* symtab_section = read_sections(context, config, elf_file); + + // Search the sections to see if any are overlays or TLB-mapped + analyze_sections(context, elf_file); + + // If no symbol table was found then exit + if (symtab_section == nullptr) { + exit_failure("No symbol table section found\n"); + } + + // Manually sized functions + for (const auto& func_size : config.manual_func_sizes) { + context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); + } + + // Read all of the symbols in the elf and look for the entrypoint function + bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols); + + // Add any manual functions + add_manual_functions(context, elf_file, config.manual_functions); + + if (config.has_entrypoint && !found_entrypoint_func) { + exit_failure("Could not find entrypoint function\n"); + } + } + // Build a context from the provided symbols file. + else if (!config.symbols_file_path.empty()) { + if (config.rom_file_path.empty()) { + exit_failure("A ROM file must be provided when using a symbols file\n"); + } + + std::vector rom = read_file(config.rom_file_path); + if (rom.empty()) { + exit_failure("Failed to load ROM file: " + config.rom_file_path.string() + "\n"); + } + + if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context)) { + exit_failure("Failed to load symbols file\n"); + } + + for (RecompPort::Function& func : context.functions) { + if (reimplemented_funcs.contains(func.name)) { + func.reimplemented = true; + func.name = func.name + "_recomp"; + func.ignored = true; + } else if (ignored_funcs.contains(func.name)) { + func.name = func.name + "_recomp"; + func.ignored = true; + } else if (renamed_funcs.contains(func.name)) { + func.name = func.name + "_recomp"; + func.ignored = false; + } + } + } + else { + exit_failure("Config file must provide either an elf or a symbols file\n"); } - if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) { - exit_failure("Incorrect endianness\n"); - } - - RecompPort::Context context{ elf_file }; - context.relocatable_sections = std::move(relocatable_sections); - - // Read all of the sections in the elf and look for the symbol table section - ELFIO::section* symtab_section = read_sections(context, config, elf_file); - - // Search the sections to see if any are overlays or TLB-mapped - analyze_sections(context, elf_file); - - // If no symbol table was found then exit - if (symtab_section == nullptr) { - exit_failure("No symbol table section found\n"); - } - - // Functions that weren't declared properly and thus have no size in the elf - //context.manually_sized_funcs.emplace("guMtxF2L", 0x64); - //context.manually_sized_funcs.emplace("guScaleF", 0x48); - //context.manually_sized_funcs.emplace("guTranslateF", 0x48); - //context.manually_sized_funcs.emplace("guMtxIdentF", 0x48); - //context.manually_sized_funcs.emplace("sqrtf", 0x8); - //context.manually_sized_funcs.emplace("guMtxIdent", 0x4C); - for (const auto& func_size : config.manual_func_sizes) { - context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); - } - - // Read all of the symbols in the elf and look for the entrypoint function - bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols); - - // Add any manual functions - add_manual_functions(context, elf_file, config.manual_functions); - - if (config.has_entrypoint && !found_entrypoint_func) { - exit_failure("Could not find entrypoint function\n"); - } fmt::print("Function count: {}\n", context.functions.size()); @@ -1312,6 +1415,11 @@ int main(int argc, char** argv) { std::vector> static_funcs_by_section{ context.sections.size() }; + // TODO expose a way to dump the context from the command line. Make sure not to rename functions when doing so. + //fmt::print("Dumping context\n"); + //dump_context(context, "dump.toml"); + //return 0; + fmt::print("Working dir: {}\n", std::filesystem::current_path().string()); // Stub out any functions specified in the config file.