From 916d16417ecf3ee70912bf8022d38e85763a7627 Mon Sep 17 00:00:00 2001 From: Ethan Lafrenais Date: Thu, 16 Jan 2025 00:32:29 -0500 Subject: [PATCH] RSPRecomp overlay support (#118) * RSPRecomp overlay support * Change overlay_slot.offset config to text_address --- RSPRecomp/src/rsp_recomp.cpp | 503 ++++++++++++++++++++++++++++++++--- 1 file changed, 471 insertions(+), 32 deletions(-) diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp index b634bd5..02f99ab 100644 --- a/RSPRecomp/src/rsp_recomp.cpp +++ b/RSPRecomp/src/rsp_recomp.cpp @@ -149,7 +149,7 @@ std::string_view c0_reg_write_action(int cop0_reg) { case Cop0Reg::RSP_COP0_SP_DRAM_ADDR: return "SET_DMA_DRAM"; case Cop0Reg::RSP_COP0_SP_MEM_ADDR: - return "SET_DMA_DMEM"; + return "SET_DMA_MEM"; case Cop0Reg::RSP_COP0_SP_RD_LEN: return "DO_DMA_READ"; case Cop0Reg::RSP_COP0_SP_WR_LEN: @@ -161,6 +161,10 @@ std::string_view c0_reg_write_action(int cop0_reg) { } +bool is_c0_reg_write_dma_read(int cop0_reg) { + return static_cast(cop0_reg) == Cop0Reg::RSP_COP0_SP_RD_LEN; +} + std::optional get_rsp_element(const rabbitizer::InstructionRsp& instr) { if (instr.hasOperand(rabbitizer::OperandType::rsp_vt_elementhigh)) { return instr.GetRsp_elementhigh(); @@ -193,7 +197,32 @@ BranchTargets get_branch_targets(const std::vector& return ret; } -bool process_instruction(size_t instr_index, const std::vector& instructions, std::ofstream& output_file, const BranchTargets& branch_targets, const std::unordered_set& unsupported_instructions, bool indent, bool in_delay_slot) { +struct ResumeTargets { + std::unordered_set non_delay_targets; + std::unordered_set delay_targets; +}; + +void get_overlay_swap_resume_targets(const std::vector& instrs, ResumeTargets& targets) { + bool is_delay_slot = false; + for (const auto& instr : instrs) { + InstrId instr_id = instr.getUniqueId(); + int rd = (int)instr.GetO32_rd(); + + if (instr_id == InstrId::rsp_mtc0 && is_c0_reg_write_dma_read(rd)) { + uint32_t vram = instr.getVram(); + + targets.non_delay_targets.insert(vram); + + if (is_delay_slot) { + targets.delay_targets.insert(vram); + } + } + + is_delay_slot = instr.hasDelaySlot(); + } +} + +bool process_instruction(size_t instr_index, const std::vector& instructions, std::ofstream& output_file, const BranchTargets& branch_targets, const std::unordered_set& unsupported_instructions, const ResumeTargets& resume_targets, bool has_overlays, bool indent, bool in_delay_slot) { const auto& instr = instructions[instr_index]; uint32_t instr_vram = instr.getVram(); @@ -236,7 +265,7 @@ bool process_instruction(size_t instr_index, const std::vector(fmt::format_string fmt_str, Ts&& ...args) { if (instr_index < instructions.size() - 1) { uint32_t next_vram = instr_vram + 4; - process_instruction(instr_index + 1, instructions, output_file, branch_targets, unsupported_instructions, false, true); + process_instruction(instr_index + 1, instructions, output_file, branch_targets, unsupported_instructions, resume_targets, has_overlays, false, true); } print_indent(); fmt::print(output_file, fmt_str, std::forward(args)...); @@ -247,7 +276,7 @@ bool process_instruction(size_t instr_index, const std::vector(args)...); @@ -508,8 +537,18 @@ bool process_instruction(size_t instr_index, const std::vectorresume_address = 0x{:04X};\n" + " ctx->resume_delay = {};\n" + " goto do_overlay_swap;\n" + " }}\n", + instr_vram, in_delay_slot ? "true" : "false"); + } if (!write_action.empty()) { - print_line("{}({}{})", write_action, ctx_gpr_prefix(rt), rt); \ + print_line("{}({}{})", write_action, ctx_gpr_prefix(rt), rt); } break; } @@ -520,6 +559,17 @@ bool process_instruction(size_t instr_index, const std::vectorr1 = r1; ctx->r2 = r2; ctx->r3 = r3; ctx->r4 = r4; ctx->r5 = r5; ctx->r6 = r6; ctx->r7 = r7;\n" + " ctx->r8 = r8; ctx->r9 = r9; ctx->r10 = r10; ctx->r11 = r11; ctx->r12 = r12; ctx->r13 = r13; ctx->r14 = r14; ctx->r15 = r15;\n" + " ctx->r16 = r16; ctx->r17 = r17; ctx->r18 = r18; ctx->r19 = r19; ctx->r20 = r20; ctx->r21 = r21; ctx->r22 = r22; ctx->r23 = r23;\n" + " ctx->r24 = r24; ctx->r25 = r25; ctx->r26 = r26; ctx->r27 = r27; ctx->r28 = r28; ctx->r29 = r29; ctx->r30 = r30; ctx->r31 = r31;\n" + " ctx->dma_mem_address = dma_mem_address;\n" + " ctx->dma_dram_address = dma_dram_address;\n" + " ctx->jump_target = jump_target;\n" + " ctx->rsp = rsp;\n" + " return RspExitReason::SwapOverlay;\n"); +} + #ifdef _MSC_VER inline uint32_t byteswap(uint32_t val) { return _byteswap_ulong(val); @@ -552,6 +616,16 @@ constexpr uint32_t byteswap(uint32_t val) { } #endif +struct RSPRecompilerOverlayConfig { + size_t offset; + size_t size; +}; + +struct RSPRecompilerOverlaySlotConfig { + size_t text_address; + std::vector overlays; +}; + struct RSPRecompilerConfig { size_t text_offset; size_t text_size; @@ -561,6 +635,7 @@ struct RSPRecompilerConfig { std::string output_function_name; std::vector extra_indirect_branch_targets; std::unordered_set unsupported_instructions; + std::vector overlay_slots; }; std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { @@ -666,6 +741,76 @@ bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& const toml::array* unsupported_instructions_array = unsupported_instructions_data.as_array(); ret.unsupported_instructions = toml_to_set(unsupported_instructions_array); } + + // Overlay slots (optional) + const toml::node_view overlay_slots = config_data["overlay_slots"]; + if (overlay_slots.is_array()) { + const toml::array* overlay_slots_array = overlay_slots.as_array(); + + int slot_idx = 0; + overlay_slots_array->for_each([&](toml::table slot){ + RSPRecompilerOverlaySlotConfig slot_config; + + std::optional text_address = slot["text_address"].value(); + if (text_address.has_value()) { + slot_config.text_address = text_address.value(); + } + else { + throw toml::parse_error( + fmt::format("Missing text_address in config file at overlay slot {}", slot_idx).c_str(), + config_data.source()); + } + + // Overlays per slot + const toml::node_view overlays = slot["overlays"]; + if (overlays.is_array()) { + const toml::array* overlay_array = overlays.as_array(); + + int overlay_idx = 0; + overlay_array->for_each([&](toml::table overlay){ + RSPRecompilerOverlayConfig overlay_config; + + std::optional offset = overlay["offset"].value(); + if (offset.has_value()) { + overlay_config.offset = offset.value(); + } + else { + throw toml::parse_error( + fmt::format("Missing offset in config file at overlay slot {} overlay {}", slot_idx, overlay_idx).c_str(), + config_data.source()); + } + + std::optional size = overlay["size"].value(); + if (size.has_value()) { + overlay_config.size = size.value(); + + if ((size.value() % sizeof(uint32_t)) != 0) { + throw toml::parse_error( + fmt::format("Overlay size must be a multiple of {} in config file at overlay slot {} overlay {}", sizeof(uint32_t), slot_idx, overlay_idx).c_str(), + config_data.source()); + } + } + else { + throw toml::parse_error( + fmt::format("Missing size in config file at overlay slot {} overlay {}", slot_idx, overlay_idx).c_str(), + config_data.source()); + } + + slot_config.overlays.push_back(overlay_config); + overlay_idx++; + }); + } + else { + throw toml::parse_error( + fmt::format("Missing overlays in config file at overlay slot {}", slot_idx).c_str(), + config_data.source()); + } + + ret.overlay_slots.push_back(slot_config); + slot_idx++; + }); + } + } catch (const toml::parse_error& err) { std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; @@ -676,6 +821,269 @@ bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& return true; } +struct FunctionPermutation { + std::vector instrs; + std::vector permutation; +}; + +struct Permutation { + std::vector instr_words; + std::vector permutation; +}; + +struct Overlay { + std::vector instr_words; +}; + +struct OverlaySlot { + uint32_t offset; + std::vector overlays; +}; + +bool next_permutation(const std::vector& option_lengths, std::vector& current) { + current[current.size() - 1] += 1; + + size_t i = current.size() - 1; + while (current[i] == option_lengths[i]) { + current[i] = 0; + if (i == 0) { + return false; + } + + current[i - 1] += 1; + i--; + } + + return true; +} + +void permute(const std::vector& base_words, const std::vector& overlay_slots, std::vector& permutations) { + auto current = std::vector(overlay_slots.size(), 0); + auto slot_options = std::vector(overlay_slots.size(), 0); + + for (size_t i = 0; i < overlay_slots.size(); i++) { + slot_options[i] = overlay_slots[i].overlays.size(); + } + + do { + Permutation permutation = { + .instr_words = std::vector(base_words), + .permutation = std::vector(current) + }; + + for (size_t i = 0; i < overlay_slots.size(); i++) { + const OverlaySlot &slot = overlay_slots[i]; + const Overlay &overlay = slot.overlays[current[i]]; + + uint32_t word_offset = slot.offset / sizeof(uint32_t); + + size_t size_needed = word_offset + overlay.instr_words.size(); + if (permutation.instr_words.size() < size_needed) { + permutation.instr_words.reserve(size_needed); + } + + std::copy(overlay.instr_words.begin(), overlay.instr_words.end(), permutation.instr_words.data() + word_offset); + } + + permutations.push_back(permutation); + } while (next_permutation(slot_options, current)); +} + +std::string make_permutation_string(const std::vector permutation) { + std::string str = ""; + + for (uint32_t opt : permutation) { + str += std::to_string(opt); + } + + return str; +} + +void create_overlay_swap_function(const std::string& function_name, std::ofstream& output_file, const std::vector& permutations, const RSPRecompilerConfig& config) { + // Includes and permutation protos + fmt::print(output_file, + "#include \n" + "#include \n\n" + "using RspUcodePermutationFunc = RspExitReason(uint8_t* rdram, RspContext* ctx);\n\n" + "RspExitReason {}(uint8_t* rdram, RspContext* ctx);\n", + config.output_function_name + "_initial"); + + for (const auto& permutation : permutations) { + fmt::print(output_file, "RspExitReason {}(uint8_t* rdram, RspContext* ctx);\n", + config.output_function_name + make_permutation_string(permutation.permutation)); + } + fmt::print(output_file, "\n"); + + // IMEM -> slot index mapping + fmt::print(output_file, + "static const std::map imemToSlot = {{\n"); + for (size_t i = 0; i < config.overlay_slots.size(); i++) { + const RSPRecompilerOverlaySlotConfig& slot = config.overlay_slots[i]; + + uint32_t imemAddress = slot.text_address & rsp_mem_mask; + fmt::print(output_file, " {{ 0x{:04X}, {} }},\n", + imemAddress, i); + } + fmt::print(output_file, "}};\n\n"); + + // ucode offset -> overlay index mapping (per slot) + fmt::print(output_file, + "static const std::vector> offsetToOverlay = {{\n"); + for (const auto& slot : config.overlay_slots) { + fmt::print(output_file, " {{\n"); + for (size_t i = 0; i < slot.overlays.size(); i++) { + const RSPRecompilerOverlayConfig& overlay = slot.overlays[i]; + + fmt::print(output_file, " {{ 0x{:04X}, {} }},\n", + overlay.offset, i); + } + fmt::print(output_file, " }},\n"); + } + fmt::print(output_file, "}};\n\n"); + + // Permutation function pointers + fmt::print(output_file, + "static RspUcodePermutationFunc* permutations[] = {{\n"); + for (const auto& permutation : permutations) { + fmt::print(output_file, " {},\n", + config.output_function_name + make_permutation_string(permutation.permutation)); + } + fmt::print(output_file, "}};\n\n"); + + // Main function + fmt::print(output_file, + "RspExitReason {}(uint8_t* rdram, uint32_t ucode_addr) {{\n" + " RspContext ctx{{}};\n", + config.output_function_name); + + std::string slots_init_str = ""; + for (size_t i = 0; i < config.overlay_slots.size(); i++) { + if (i > 0) { + slots_init_str += ", "; + } + + slots_init_str += "0"; + } + + fmt::print(output_file, " uint32_t slots[] = {{{}}};\n\n", + slots_init_str); + + fmt::print(output_file, " RspExitReason exitReason = {}(rdram, &ctx);\n\n", + config.output_function_name + "_initial"); + + fmt::print(output_file, ""); + + std::string perm_index_str = ""; + for (size_t i = 0; i < config.overlay_slots.size(); i++) { + if (i > 0) { + perm_index_str += " + "; + } + + uint32_t multiplier = 1; + for (size_t k = i + 1; k < config.overlay_slots.size(); k++) { + multiplier *= config.overlay_slots[k].overlays.size(); + } + + perm_index_str += fmt::format("slots[{}] * {}", i, multiplier); + } + + fmt::print(output_file, + " while (exitReason == RspExitReason::SwapOverlay) {{\n" + " uint32_t slot = imemToSlot.at(ctx.dma_mem_address);\n" + " uint32_t overlay = offsetToOverlay.at(slot).at(ctx.dma_dram_address - ucode_addr);\n" + " slots[slot] = overlay;\n" + "\n" + " RspUcodePermutationFunc* permutationFunc = permutations[{}];\n" + " exitReason = permutationFunc(rdram, &ctx);\n" + " }}\n\n" + " return exitReason;\n" + "}}\n\n", + perm_index_str); +} + +void create_function(const std::string& function_name, std::ofstream& output_file, const std::vector& instrs, const RSPRecompilerConfig& config, const ResumeTargets& resume_targets, bool is_permutation, bool is_initial) { + // Collect indirect jump targets (return addresses for linked jumps) + BranchTargets branch_targets = get_branch_targets(instrs); + + // Add any additional indirect branch targets that may not be found directly in the code (e.g. from a jump table) + for (uint32_t target : config.extra_indirect_branch_targets) { + branch_targets.indirect_targets.insert(target); + } + + // Write function + if (is_permutation) { + fmt::print(output_file, + "RspExitReason {}(uint8_t* rdram, RspContext* ctx) {{\n" + " uint32_t r1 = ctx->r1, r2 = ctx->r2, r3 = ctx->r3, r4 = ctx->r4, r5 = ctx->r5, r6 = ctx->r6, r7 = ctx->r7;\n" + " uint32_t r8 = ctx->r8, r9 = ctx->r9, r10 = ctx->r10, r11 = ctx->r11, r12 = ctx->r12, r13 = ctx->r13, r14 = ctx->r14, r15 = ctx->r15;\n" + " uint32_t r16 = ctx->r16, r17 = ctx->r17, r18 = ctx->r18, r19 = ctx->r19, r20 = ctx->r20, r21 = ctx->r21, r22 = ctx->r22, r23 = ctx->r23;\n" + " uint32_t r24 = ctx->r24, r25 = ctx->r25, r26 = ctx->r26, r27 = ctx->r27, r28 = ctx->r28, r29 = ctx->r29, r30 = ctx->r30, r31 = ctx->r31;\n" + " uint32_t dma_mem_address = ctx->dma_mem_address, dma_dram_address = ctx->dma_dram_address, jump_target = ctx->jump_target;\n" + " const char * debug_file = NULL; int debug_line = 0;\n" + " RSP rsp = ctx->rsp;\n", function_name); + + // Write jumps to resume targets + if (!is_initial) { + fmt::print(output_file, + " if (ctx->resume_delay) {{\n" + " switch (ctx->resume_address) {{\n"); + + for (uint32_t address : resume_targets.delay_targets) { + fmt::print(output_file, " case 0x{0:04X}: goto R_{0:04X}_delay;\n", + address); + } + + fmt::print(output_file, + " }}\n" + " }} else {{\n" + " switch (ctx->resume_address) {{\n"); + + for (uint32_t address : resume_targets.non_delay_targets) { + fmt::print(output_file, " case 0x{0:04X}: goto R_{0:04X};\n", + address); + } + + fmt::print(output_file, + " }}\n" + " }}\n" + " printf(\"Unhandled resume target 0x%04X (delay slot: %d) in microcode {}\\n\", ctx->resume_address, ctx->resume_delay);\n" + " return RspExitReason::UnhandledResumeTarget;\n", + config.output_function_name); + } + + fmt::print(output_file, " r1 = 0xFC0;\n"); + } else { + fmt::print(output_file, + "RspExitReason {}(uint8_t* rdram) {{\n" + " uint32_t r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0, r6 = 0, r7 = 0;\n" + " uint32_t r8 = 0, r9 = 0, r10 = 0, r11 = 0, r12 = 0, r13 = 0, r14 = 0, r15 = 0;\n" + " uint32_t r16 = 0, r17 = 0, r18 = 0, r19 = 0, r20 = 0, r21 = 0, r22 = 0, r23 = 0;\n" + " uint32_t r24 = 0, r25 = 0, r26 = 0, r27 = 0, r28 = 0, r29 = 0, r30 = 0, r31 = 0;\n" + " uint32_t dma_mem_address = 0, dma_dram_address = 0, jump_target = 0;\n" + " const char * debug_file = NULL; int debug_line = 0;\n" + " RSP rsp{{}};\n" + " r1 = 0xFC0;\n", function_name); + } + // Write each instruction + for (size_t instr_index = 0; instr_index < instrs.size(); instr_index++) { + process_instruction(instr_index, instrs, output_file, branch_targets, config.unsupported_instructions, resume_targets, is_permutation, false, false); + } + + // Terminate instruction code with a return to indicate that the microcode has run past its end + fmt::print(output_file, " return RspExitReason::ImemOverrun;\n"); + + // Write the section containing the indirect jump table + write_indirect_jumps(output_file, branch_targets, config.output_function_name); + + // Write routine for returning for an overlay swap + if (is_permutation) { + write_overlay_swap_return(output_file); + } + + // End the file + fmt::print(output_file, "}}\n"); +} + int main(int argc, const char** argv) { if (argc != 2) { fmt::print("Usage: {} [config file]\n", argv[0]); @@ -689,6 +1097,7 @@ int main(int argc, const char** argv) { } std::vector instr_words{}; + std::vector overlay_slots{}; instr_words.resize(config.text_size / sizeof(uint32_t)); { std::ifstream rom_file{ config.rom_file_path, std::ios_base::binary }; @@ -700,6 +1109,29 @@ int main(int argc, const char** argv) { rom_file.seekg(config.text_offset); rom_file.read(reinterpret_cast(instr_words.data()), config.text_size); + + for (const RSPRecompilerOverlaySlotConfig &slot_config : config.overlay_slots) { + OverlaySlot slot{}; + slot.offset = (slot_config.text_address - config.text_address) & rsp_mem_mask; + + for (const RSPRecompilerOverlayConfig &overlay_config : slot_config.overlays) { + Overlay overlay{}; + overlay.instr_words.resize(overlay_config.size / sizeof(uint32_t)); + + rom_file.seekg(config.text_offset + overlay_config.offset); + rom_file.read(reinterpret_cast(overlay.instr_words.data()), overlay_config.size); + + slot.overlays.push_back(overlay); + } + + overlay_slots.push_back(slot); + } + } + + // Create overlay permutations + std::vector permutations{}; + if (!overlay_slots.empty()) { + permute(instr_words, overlay_slots, permutations); } // Disable appropriate pseudo instructions @@ -717,12 +1149,27 @@ int main(int argc, const char** argv) { vram += instr_size; } - // Collect indirect jump targets (return addresses for linked jumps) - BranchTargets branch_targets = get_branch_targets(instrs); + std::vector func_permutations{}; + func_permutations.reserve(permutations.size()); + for (const Permutation& permutation : permutations) { + FunctionPermutation func = { + .permutation = std::vector(permutation.permutation) + }; - // Add any additional indirect branch targets that may not be found directly in the code (e.g. from a jump table) - for (uint32_t target : config.extra_indirect_branch_targets) { - branch_targets.indirect_targets.insert(target); + func.instrs.reserve(permutation.instr_words.size()); + uint32_t vram = config.text_address & rsp_mem_mask; + for (uint32_t instr_word : permutation.instr_words) { + const rabbitizer::InstructionRsp& instr = func.instrs.emplace_back(byteswap(instr_word), vram); + vram += instr_size; + } + + func_permutations.emplace_back(func); + } + + // Determine all possible overlay swap resume targets + ResumeTargets resume_targets{}; + for (const FunctionPermutation& permutation : func_permutations) { + get_overlay_swap_resume_targets(permutation.instrs, resume_targets); } // Open output file and write beginning @@ -730,28 +1177,20 @@ int main(int argc, const char** argv) { std::ofstream output_file(config.output_file_path); fmt::print(output_file, "#include \"librecomp/rsp.hpp\"\n" - "#include \"librecomp/rsp_vu_impl.hpp\"\n" - "RspExitReason {}(uint8_t* rdram) {{\n" - " uint32_t r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0, r6 = 0, r7 = 0;\n" - " uint32_t r8 = 0, r9 = 0, r10 = 0, r11 = 0, r12 = 0, r13 = 0, r14 = 0, r15 = 0;\n" - " uint32_t r16 = 0, r17 = 0, r18 = 0, r19 = 0, r20 = 0, r21 = 0, r22 = 0, r23 = 0;\n" - " uint32_t r24 = 0, r25 = 0, r26 = 0, r27 = 0, r28 = 0, r29 = 0, r30 = 0, r31 = 0;\n" - " uint32_t dma_dmem_address = 0, dma_dram_address = 0, jump_target = 0;\n" - " const char * debug_file = NULL; int debug_line = 0;\n" - " RSP rsp{{}};\n" - " r1 = 0xFC0;\n", config.output_function_name); - // Write each instruction - for (size_t instr_index = 0; instr_index < instrs.size(); instr_index++) { - process_instruction(instr_index, instrs, output_file, branch_targets, config.unsupported_instructions, false, false); + "#include \"librecomp/rsp_vu_impl.hpp\"\n"); + + // Write function(s) + if (overlay_slots.empty()) { + create_function(config.output_function_name, output_file, instrs, config, resume_targets, false, false); + } else { + create_overlay_swap_function(config.output_function_name, output_file, func_permutations, config); + create_function(config.output_function_name + "_initial", output_file, instrs, config, ResumeTargets{}, true, true); + + for (const auto& permutation : func_permutations) { + create_function(config.output_function_name + make_permutation_string(permutation.permutation), + output_file, permutation.instrs, config, resume_targets, true, false); + } } - // Terminate instruction code with a return to indicate that the microcode has run past its end - fmt::print(output_file, " return RspExitReason::ImemOverrun;\n"); - - // Write the section containing the indirect jump table - write_indirect_jumps(output_file, branch_targets, config.output_function_name); - - // End the file - fmt::print(output_file, "}}\n"); return 0; }