HLE: Convert GSP_GPU to ServiceFramework.

The only functional change is the error handling of GSP_GPU::ReadHWRegs function. We previously didn't return error codes (not even for success). The new returns were found by reverse engineering the GSP module.
2017-12-16 14:35:37 -05:00 · 2017-12-16 14:35:37 -05:00 · 3652809408
commit 3652809408
parent f4b595331f
11 changed files with 655 additions and 518 deletions
--- a/src/core/hle/service/gsp/gsp.cpp
+++ b/src/core/hle/service/gsp/gsp.cpp
@ -0,0 +1,35 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+#include "core/hle/kernel/event.h"
+#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/service/gsp/gsp.h"
+
+namespace Service {
+namespace GSP {
+
+static std::weak_ptr<GSP_GPU> gsp_gpu;
+
+FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index) {
+    auto gpu = gsp_gpu.lock();
+    ASSERT(gpu != nullptr);
+    return gpu->GetFrameBufferInfo(thread_id, screen_index);
+}
+
+void SignalInterrupt(InterruptId interrupt_id) {
+    auto gpu = gsp_gpu.lock();
+    ASSERT(gpu != nullptr);
+    return gpu->SignalInterrupt(interrupt_id);
+}
+
+void InstallInterfaces(SM::ServiceManager& service_manager) {
+    auto gpu = std::make_shared<GSP_GPU>();
+    gpu->InstallAsService(service_manager);
+
+    gsp_gpu = gpu;
+}
+
+} // namespace GSP
+} // namespace Service
--- a/src/core/hle/service/gsp/gsp.h
+++ b/src/core/hle/service/gsp/gsp.h
@ -0,0 +1,32 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include "common/common_types.h"
+#include "core/hle/result.h"
+#include "core/hle/service/gsp/gsp_gpu.h"
+
+namespace Service {
+namespace GSP {
+/**
+ * Retrieves the framebuffer info stored in the GSP shared memory for the
+ * specified screen index and thread id.
+ * @param thread_id GSP thread id of the process that accesses the structure that we are requesting.
+ * @param screen_index Index of the screen we are requesting (Top = 0, Bottom = 1).
+ * @returns FramebufferUpdate Information about the specified framebuffer.
+ */
+FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index);
+
+/**
+ * Signals that the specified interrupt type has occurred to userland code
+ * @param interrupt_id ID of interrupt that is being signalled
+ */
+void SignalInterrupt(InterruptId interrupt_id);
+
+void InstallInterfaces(SM::ServiceManager& service_manager);
+} // namespace GSP
+} // namespace Service
--- a/src/core/hle/service/gsp/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp/gsp_gpu.cpp
@ -0,0 +1,707 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+#include "common/bit_field.h"
+#include "common/microprofile.h"
+#include "common/swap.h"
+#include "core/core.h"
+#include "core/hle/ipc.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/event.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/result.h"
+#include "core/hle/service/gsp/gsp_gpu.h"
+#include "core/hw/gpu.h"
+#include "core/hw/hw.h"
+#include "core/hw/lcd.h"
+#include "core/memory.h"
+#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/gpu_debugger.h"
+
+// Main graphics debugger object - TODO: Here is probably not the best place for this
+GraphicsDebugger g_debugger;
+
+namespace Service {
+namespace GSP {
+
+// Beginning address of HW regs
+const u32 REGS_BEGIN = 0x1EB00000;
+
+namespace ErrCodes {
+enum {
+    // TODO(purpasmart): Check if this name fits its actual usage
+    OutofRangeOrMisalignedAddress = 513,
+    FirstInitialization = 519,
+};
+}
+
+constexpr ResultCode RESULT_FIRST_INITIALIZATION(ErrCodes::FirstInitialization, ErrorModule::GX,
+                                                 ErrorSummary::Success, ErrorLevel::Success);
+constexpr ResultCode ERR_REGS_OUTOFRANGE_OR_MISALIGNED(ErrCodes::OutofRangeOrMisalignedAddress,
+                                                       ErrorModule::GX,
+                                                       ErrorSummary::InvalidArgument,
+                                                       ErrorLevel::Usage); // 0xE0E02A01
+constexpr ResultCode ERR_REGS_MISALIGNED(ErrorDescription::MisalignedSize, ErrorModule::GX,
+                                         ErrorSummary::InvalidArgument,
+                                         ErrorLevel::Usage); // 0xE0E02BF2
+constexpr ResultCode ERR_REGS_INVALID_SIZE(ErrorDescription::InvalidSize, ErrorModule::GX,
+                                           ErrorSummary::InvalidArgument,
+                                           ErrorLevel::Usage); // 0xE0E02BEC
+
+/// Gets a pointer to a thread command buffer in GSP shared memory
+static inline u8* GetCommandBuffer(Kernel::SharedPtr<Kernel::SharedMemory> shared_memory,
+                                   u32 thread_id) {
+    return shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
+}
+
+FrameBufferUpdate* GSP_GPU::GetFrameBufferInfo(u32 thread_id, u32 screen_index) {
+    DEBUG_ASSERT_MSG(screen_index < 2, "Invalid screen index");
+
+    // For each thread there are two FrameBufferUpdate fields
+    u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate);
+    u8* ptr = shared_memory->GetPointer(offset);
+    return reinterpret_cast<FrameBufferUpdate*>(ptr);
+}
+
+/// Gets a pointer to the interrupt relay queue for a given thread index
+static inline InterruptRelayQueue* GetInterruptRelayQueue(
+    Kernel::SharedPtr<Kernel::SharedMemory> shared_memory, u32 thread_id) {
+    u8* ptr = shared_memory->GetPointer(sizeof(InterruptRelayQueue) * thread_id);
+    return reinterpret_cast<InterruptRelayQueue*>(ptr);
+}
+
+/**
+ * Writes a single GSP GPU hardware registers with a single u32 value
+ * (For internal use.)
+ *
+ * @param base_address The address of the register in question
+ * @param data Data to be written
+ */
+static void WriteSingleHWReg(u32 base_address, u32 data) {
+    DEBUG_ASSERT_MSG((base_address & 3) == 0 && base_address < 0x420000,
+                     "Write address out of range or misaligned");
+    HW::Write<u32>(base_address + REGS_BEGIN, data);
+}
+
+/**
+ * Writes sequential GSP GPU hardware registers using an array of source data
+ *
+ * @param base_address The address of the first register in the sequence
+ * @param size_in_bytes The number of registers to update (size of data)
+ * @param data A vector containing the source data
+ * @return RESULT_SUCCESS if the parameters are valid, error code otherwise
+ */
+static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, const std::vector<u8>& data) {
+    // This magic number is verified to be done by the gsp module
+    const u32 max_size_in_bytes = 0x80;
+
+    if (base_address & 3 || base_address >= 0x420000) {
+        LOG_ERROR(Service_GSP,
+                  "Write address was out of range or misaligned! (address=0x%08x, size=0x%08x)",
+                  base_address, size_in_bytes);
+        return ERR_REGS_OUTOFRANGE_OR_MISALIGNED;
+    } else if (size_in_bytes <= max_size_in_bytes) {
+        if (size_in_bytes & 3) {
+            LOG_ERROR(Service_GSP, "Misaligned size 0x%08x", size_in_bytes);
+            return ERR_REGS_MISALIGNED;
+        } else {
+            size_t offset = 0;
+            while (size_in_bytes > 0) {
+                u32 value;
+                std::memcpy(&value, &data[offset], sizeof(u32));
+                WriteSingleHWReg(base_address, value);
+
+                size_in_bytes -= 4;
+                offset += 4;
+                base_address += 4;
+            }
+            return RESULT_SUCCESS;
+        }
+
+    } else {
+        LOG_ERROR(Service_GSP, "Out of range size 0x%08x", size_in_bytes);
+        return ERR_REGS_INVALID_SIZE;
+    }
+}
+
+/**
+ * Updates sequential GSP GPU hardware registers using parallel arrays of source data and masks.
+ * For each register, the value is updated only where the mask is high
+ *
+ * @param base_address  The address of the first register in the sequence
+ * @param size_in_bytes The number of registers to update (size of data)
+ * @param data    A vector containing the data to write
+ * @param masks   A vector containing the masks
+ * @return RESULT_SUCCESS if the parameters are valid, error code otherwise
+ */
+static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes,
+                                      const std::vector<u8>& data, const std::vector<u8>& masks) {
+    // This magic number is verified to be done by the gsp module
+    const u32 max_size_in_bytes = 0x80;
+
+    if (base_address & 3 || base_address >= 0x420000) {
+        LOG_ERROR(Service_GSP,
+                  "Write address was out of range or misaligned! (address=0x%08x, size=0x%08x)",
+                  base_address, size_in_bytes);
+        return ERR_REGS_OUTOFRANGE_OR_MISALIGNED;
+    } else if (size_in_bytes <= max_size_in_bytes) {
+        if (size_in_bytes & 3) {
+            LOG_ERROR(Service_GSP, "Misaligned size 0x%08x", size_in_bytes);
+            return ERR_REGS_MISALIGNED;
+        } else {
+            size_t offset = 0;
+            while (size_in_bytes > 0) {
+                const u32 reg_address = base_address + REGS_BEGIN;
+
+                u32 reg_value;
+                HW::Read<u32>(reg_value, reg_address);
+
+                u32 value, mask;
+                std::memcpy(&value, &data[offset], sizeof(u32));
+                std::memcpy(&mask, &masks[offset], sizeof(u32));
+
+                // Update the current value of the register only for set mask bits
+                reg_value = (reg_value & ~mask) | (value & mask);
+
+                WriteSingleHWReg(base_address, reg_value);
+
+                size_in_bytes -= 4;
+                offset += 4;
+                base_address += 4;
+            }
+            return RESULT_SUCCESS;
+        }
+
+    } else {
+        LOG_ERROR(Service_GSP, "Out of range size 0x%08x", size_in_bytes);
+        return ERR_REGS_INVALID_SIZE;
+    }
+}
+
+void GSP_GPU::WriteHWRegs(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x1, 2, 2);
+    u32 reg_addr = rp.Pop<u32>();
+    u32 size = rp.Pop<u32>();
+    std::vector<u8> src_data = rp.PopStaticBuffer();
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data));
+}
+
+void GSP_GPU::WriteHWRegsWithMask(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x2, 2, 4);
+    u32 reg_addr = rp.Pop<u32>();
+    u32 size = rp.Pop<u32>();
+
+    std::vector<u8> src_data = rp.PopStaticBuffer();
+    std::vector<u8> mask_data = rp.PopStaticBuffer();
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data));
+}
+
+void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x4, 2, 0);
+    u32 reg_addr = rp.Pop<u32>();
+    u32 input_size = rp.Pop<u32>();
+
+    static constexpr u32 MaxReadSize = 0x80;
+    u32 size = std::min(input_size, MaxReadSize);
+
+    if ((reg_addr % 4) != 0 || reg_addr >= 0x420000) {
+        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+        rb.Push(ERR_REGS_OUTOFRANGE_OR_MISALIGNED);
+        LOG_ERROR(Service_GSP, "Invalid address 0x%08x", reg_addr);
+        return;
+    }
+
+    // size should be word-aligned
+    if ((size % 4) != 0) {
+        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+        rb.Push(ERR_REGS_MISALIGNED);
+        LOG_ERROR(Service_GSP, "Invalid size 0x%08x", size);
+        return;
+    }
+
+    std::vector<u8> buffer(size);
+    for (u32 offset = 0; offset < size; ++offset) {
+        HW::Read<u8>(buffer[offset], REGS_BEGIN + reg_addr + offset);
+    }
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 2);
+    rb.Push(RESULT_SUCCESS);
+    rb.PushStaticBuffer(std::move(buffer), 0);
+}
+
+ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
+    u32 base_address = 0x400000;
+    PAddr phys_address_left = Memory::VirtualToPhysicalAddress(info.address_left);
+    PAddr phys_address_right = Memory::VirtualToPhysicalAddress(info.address_right);
+    if (info.active_fb == 0) {
+        WriteSingleHWReg(
+            base_address +
+                4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].address_left1)),
+            phys_address_left);
+        WriteSingleHWReg(
+            base_address +
+                4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].address_right1)),
+            phys_address_right);
+    } else {
+        WriteSingleHWReg(
+            base_address +
+                4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].address_left2)),
+            phys_address_left);
+        WriteSingleHWReg(
+            base_address +
+                4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].address_right2)),
+            phys_address_right);
+    }
+    WriteSingleHWReg(base_address +
+                         4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].stride)),
+                     info.stride);
+    WriteSingleHWReg(
+        base_address +
+            4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].color_format)),
+        info.format);
+    WriteSingleHWReg(
+        base_address + 4 * static_cast<u32>(GPU_REG_INDEX(framebuffer_config[screen_id].active_fb)),
+        info.shown_fb);
+
+    if (Pica::g_debug_context)
+        Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr);
+
+    if (screen_id == 0) {
+        MicroProfileFlip();
+        Core::System::GetInstance().perf_stats.EndGameFrame();
+    }
+
+    return RESULT_SUCCESS;
+}
+
+void GSP_GPU::SetBufferSwap(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x5, 8, 0);
+    u32 screen_id = rp.Pop<u32>();
+    auto fb_info = rp.PopRaw<FrameBufferInfo>();
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(GSP::SetBufferSwap(screen_id, fb_info));
+}
+
+void GSP_GPU::FlushDataCache(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x9, 2, 2);
+    u32 address = rp.Pop<u32>();
+    u32 size = rp.Pop<u32>();
+    auto process = rp.PopObject<Kernel::Process>();
+
+    // TODO(purpasmart96): Verify return header on HW
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+
+    LOG_DEBUG(Service_GSP, "(STUBBED) called address=0x%08X, size=0x%08X, process=%u", address,
+              size, process->process_id);
+}
+
+void GSP_GPU::SetAxiConfigQoSMode(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x10, 1, 0);
+    u32 mode = rp.Pop<u32>();
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+
+    LOG_DEBUG(Service_GSP, "(STUBBED) called mode=0x%08X", mode);
+}
+
+void GSP_GPU::RegisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x13, 1, 2);
+    u32 flags = rp.Pop<u32>();
+
+    interrupt_event = rp.PopObject<Kernel::Event>();
+    // TODO(mailwl): return right error code instead assert
+    ASSERT_MSG((interrupt_event != nullptr), "handle is not valid!");
+
+    interrupt_event->name = "GSP_GSP_GPU::interrupt_event";
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(2, 2);
+
+    if (first_initialization) {
+        // This specific code is required for a successful initialization, rather than 0
+        first_initialization = false;
+        rb.Push(RESULT_FIRST_INITIALIZATION);
+    } else {
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    rb.Push(thread_id);
+    rb.PushCopyObjects(shared_memory);
+
+    thread_id++;
+    interrupt_event->Signal(); // TODO(bunnei): Is this correct?
+
+    LOG_WARNING(Service_GSP, "called, flags=0x%08X", flags);
+}
+
+void GSP_GPU::UnregisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x14, 0, 0);
+
+    thread_id = 0;
+    interrupt_event = nullptr;
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+
+    LOG_WARNING(Service_GSP, "(STUBBED) called");
+}
+
+/**
+ * Signals that the specified interrupt type has occurred to userland code
+ * @param interrupt_id ID of interrupt that is being signalled
+ * @todo This should probably take a thread_id parameter and only signal this thread?
+ * @todo This probably does not belong in the GSP module, instead move to video_core
+ */
+void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) {
+    if (!gpu_right_acquired) {
+        return;
+    }
+    if (nullptr == interrupt_event) {
+        LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
+        return;
+    }
+    if (nullptr == shared_memory) {
+        LOG_WARNING(Service_GSP, "cannot synchronize until GSP shared memory has been created!");
+        return;
+    }
+    for (int thread_id = 0; thread_id < 0x4; ++thread_id) {
+        InterruptRelayQueue* interrupt_relay_queue =
+            GetInterruptRelayQueue(shared_memory, thread_id);
+        u8 next = interrupt_relay_queue->index;
+        next += interrupt_relay_queue->number_interrupts;
+        next = next % 0x34; // 0x34 is the number of interrupt slots
+
+        interrupt_relay_queue->number_interrupts += 1;
+
+        interrupt_relay_queue->slot[next] = interrupt_id;
+        interrupt_relay_queue->error_code = 0x0; // No error
+
+        // Update framebuffer information if requested
+        // TODO(yuriks): Confirm where this code should be called. It is definitely updated without
+        //               executing any GSP commands, only waiting on the event.
+        int screen_id =
+            (interrupt_id == InterruptId::PDC0) ? 0 : (interrupt_id == InterruptId::PDC1) ? 1 : -1;
+        if (screen_id != -1) {
+            FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id);
+            if (info->is_dirty) {
+                GSP::SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
+                info->is_dirty.Assign(false);
+            }
+        }
+    }
+    interrupt_event->Signal();
+}
+
+MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
+
+/// Executes the next GSP command
+static void ExecuteCommand(const Command& command, u32 thread_id) {
+    // Utility function to convert register ID to address
+    static auto WriteGPURegister = [](u32 id, u32 data) {
+        GPU::Write<u32>(0x1EF00000 + 4 * id, data);
+    };
+
+    switch (command.id) {
+
+    // GX request DMA - typically used for copying memory from GSP heap to VRAM
+    case CommandId::REQUEST_DMA: {
+        MICROPROFILE_SCOPE(GPU_GSP_DMA);
+
+        // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever
+        // possible/likely
+        Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address,
+                                             command.dma_request.size, Memory::FlushMode::Flush);
+        Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
+                                             command.dma_request.size,
+                                             Memory::FlushMode::FlushAndInvalidate);
+
+        // TODO(Subv): These memory accesses should not go through the application's memory mapping.
+        // They should go through the GSP module's memory mapping.
+        Memory::CopyBlock(command.dma_request.dest_address, command.dma_request.source_address,
+                          command.dma_request.size);
+        SignalInterrupt(InterruptId::DMA);
+        break;
+    }
+    // TODO: This will need some rework in the future. (why?)
+    case CommandId::SUBMIT_GPU_CMDLIST: {
+        auto& params = command.submit_gpu_cmdlist;
+
+        if (params.do_flush) {
+            // This flag flushes the command list (params.address, params.size) from the cache.
+            // Command lists are not processed by the hardware renderer, so we don't need to
+            // actually flush them in Citra.
+        }
+
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.address)),
+                         Memory::VirtualToPhysicalAddress(params.address) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.size)),
+                         params.size);
+
+        // TODO: Not sure if we are supposed to always write this .. seems to trigger processing
+        // though
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.trigger)), 1);
+
+        // TODO(yuriks): Figure out the meaning of the `flags` field.
+
+        break;
+    }
+
+    // It's assumed that the two "blocks" behave equivalently.
+    // Presumably this is done simply to allow two memory fills to run in parallel.
+    case CommandId::SET_MEMORY_FILL: {
+        auto& params = command.memory_fill;
+
+        if (params.start1 != 0) {
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)),
+                             Memory::VirtualToPhysicalAddress(params.start1) >> 3);
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)),
+                             Memory::VirtualToPhysicalAddress(params.end1) >> 3);
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value_32bit)),
+                             params.value1);
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].control)),
+                             params.control1);
+        }
+
+        if (params.start2 != 0) {
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)),
+                             Memory::VirtualToPhysicalAddress(params.start2) >> 3);
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)),
+                             Memory::VirtualToPhysicalAddress(params.end2) >> 3);
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value_32bit)),
+                             params.value2);
+            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].control)),
+                             params.control2);
+        }
+        break;
+    }
+
+    case CommandId::SET_DISPLAY_TRANSFER: {
+        auto& params = command.display_transfer;
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
+                         Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
+                         Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)),
+                         params.in_buffer_size);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)),
+                         params.out_buffer_size);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)),
+                         params.flags);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1);
+        break;
+    }
+
+    case CommandId::SET_TEXTURE_COPY: {
+        auto& params = command.texture_copy;
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
+                         Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
+                         Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
+                         params.size);
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
+                         params.in_width_gap);
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
+                         params.out_width_gap);
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), params.flags);
+
+        // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to
+        // matter.
+        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
+        break;
+    }
+
+    case CommandId::CACHE_FLUSH: {
+        // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
+        // Use command.cache_flush.regions to implement this handler
+        break;
+    }
+
+    default:
+        LOG_ERROR(Service_GSP, "unknown command 0x%08X", (int)command.id.Value());
+    }
+
+    if (Pica::g_debug_context)
+        Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::GSPCommandProcessed,
+                                       (void*)&command);
+}
+
+void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0xB, 1, 0);
+
+    bool enable_black = rp.Pop<bool>();
+    LCD::Regs::ColorFill data = {0};
+
+    // Since data is already zeroed, there is no need to explicitly set
+    // the color to black (all zero).
+    data.is_enabled.Assign(enable_black);
+
+    LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw);    // Top LCD
+    LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+}
+
+void GSP_GPU::TriggerCmdReqQueue(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0xC, 0, 0);
+
+    // Iterate through each thread's command queue...
+    for (unsigned thread_id = 0; thread_id < 0x4; ++thread_id) {
+        CommandBuffer* command_buffer = (CommandBuffer*)GetCommandBuffer(shared_memory, thread_id);
+
+        // Iterate through each command...
+        for (unsigned i = 0; i < command_buffer->number_commands; ++i) {
+            g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]);
+
+            // Decode and execute command
+            ExecuteCommand(command_buffer->commands[i], thread_id);
+
+            // Indicates that command has completed
+            command_buffer->number_commands.Assign(command_buffer->number_commands - 1);
+        }
+    }
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+}
+
+void GSP_GPU::ImportDisplayCaptureInfo(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x18, 0, 0);
+
+    // TODO(Subv): We're always returning the framebuffer structures for thread_id = 0,
+    // because we only support a single running application at a time.
+    // This should always return the framebuffer data that is currently displayed on the screen.
+
+    u32 thread_id = 0;
+
+    FrameBufferUpdate* top_screen = GetFrameBufferInfo(thread_id, 0);
+    FrameBufferUpdate* bottom_screen = GetFrameBufferInfo(thread_id, 1);
+
+    struct CaptureInfoEntry {
+        u32_le address_left;
+        u32_le address_right;
+        u32_le format;
+        u32_le stride;
+    };
+
+    CaptureInfoEntry top_entry, bottom_entry;
+    // Top Screen
+    top_entry.address_left = top_screen->framebuffer_info[top_screen->index].address_left;
+    top_entry.address_right = top_screen->framebuffer_info[top_screen->index].address_right;
+    top_entry.format = top_screen->framebuffer_info[top_screen->index].format;
+    top_entry.stride = top_screen->framebuffer_info[top_screen->index].stride;
+    // Bottom Screen
+    bottom_entry.address_left = bottom_screen->framebuffer_info[bottom_screen->index].address_left;
+    bottom_entry.address_right =
+        bottom_screen->framebuffer_info[bottom_screen->index].address_right;
+    bottom_entry.format = bottom_screen->framebuffer_info[bottom_screen->index].format;
+    bottom_entry.stride = bottom_screen->framebuffer_info[bottom_screen->index].stride;
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(9, 0);
+    rb.Push(RESULT_SUCCESS);
+    rb.PushRaw(top_entry);
+    rb.PushRaw(bottom_entry);
+
+    LOG_WARNING(Service_GSP, "called");
+}
+
+void GSP_GPU::AcquireRight(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x16, 1, 2);
+
+    u32 flag = rp.Pop<u32>();
+    auto process = rp.PopObject<Kernel::Process>();
+
+    gpu_right_acquired = true;
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+
+    LOG_WARNING(Service_GSP, "called flag=%08X process=%u", flag, process->process_id);
+}
+
+void GSP_GPU::ReleaseRight(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x17, 0, 0);
+
+    gpu_right_acquired = false;
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+
+    LOG_WARNING(Service_GSP, "called");
+}
+
+void GSP_GPU::StoreDataCache(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp(ctx, 0x1F, 2, 2);
+
+    u32 address = rp.Pop<u32>();
+    u32 size = rp.Pop<u32>();
+    auto process = rp.PopObject<Kernel::Process>();
+
+    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+    rb.Push(RESULT_SUCCESS);
+
+    LOG_DEBUG(Service_GSP, "(STUBBED) called address=0x%08X, size=0x%08X, process=%u", address,
+              size, process->process_id);
+}
+
+GSP_GPU::GSP_GPU() : ServiceFramework("gsp::Gpu", 2) {
+    static const FunctionInfo functions[] = {
+        {0x00010082, &GSP_GPU::WriteHWRegs, "WriteHWRegs"},
+        {0x00020084, &GSP_GPU::WriteHWRegsWithMask, "WriteHWRegsWithMask"},
+        {0x00030082, nullptr, "WriteHWRegRepeat"},
+        {0x00040080, &GSP_GPU::ReadHWRegs, "ReadHWRegs"},
+        {0x00050200, &GSP_GPU::SetBufferSwap, "SetBufferSwap"},
+        {0x00060082, nullptr, "SetCommandList"},
+        {0x000700C2, nullptr, "RequestDma"},
+        {0x00080082, &GSP_GPU::FlushDataCache, "FlushDataCache"},
+        {0x00090082, nullptr, "InvalidateDataCache"},
+        {0x000A0044, nullptr, "RegisterInterruptEvents"},
+        {0x000B0040, &GSP_GPU::SetLcdForceBlack, "SetLcdForceBlack"},
+        {0x000C0000, &GSP_GPU::TriggerCmdReqQueue, "TriggerCmdReqQueue"},
+        {0x000D0140, nullptr, "SetDisplayTransfer"},
+        {0x000E0180, nullptr, "SetTextureCopy"},
+        {0x000F0200, nullptr, "SetMemoryFill"},
+        {0x00100040, &GSP_GPU::SetAxiConfigQoSMode, "SetAxiConfigQoSMode"},
+        {0x00110040, nullptr, "SetPerfLogMode"},
+        {0x00120000, nullptr, "GetPerfLog"},
+        {0x00130042, &GSP_GPU::RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"},
+        {0x00140000, &GSP_GPU::UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"},
+        {0x00150002, nullptr, "TryAcquireRight"},
+        {0x00160042, &GSP_GPU::AcquireRight, "AcquireRight"},
+        {0x00170000, &GSP_GPU::ReleaseRight, "ReleaseRight"},
+        {0x00180000, &GSP_GPU::ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"},
+        {0x00190000, nullptr, "SaveVramSysArea"},
+        {0x001A0000, nullptr, "RestoreVramSysArea"},
+        {0x001B0000, nullptr, "ResetGpuCore"},
+        {0x001C0040, nullptr, "SetLedForceOff"},
+        {0x001D0040, nullptr, "SetTestCommand"},
+        {0x001E0080, nullptr, "SetInternalPriorities"},
+        {0x001F0082, &GSP_GPU::StoreDataCache, "StoreDataCache"},
+    };
+    RegisterHandlers(functions);
+
+    interrupt_event = nullptr;
+
+    using Kernel::MemoryPermission;
+    shared_memory = Kernel::SharedMemory::Create(nullptr, 0x1000, MemoryPermission::ReadWrite,
+                                                 MemoryPermission::ReadWrite, 0,
+                                                 Kernel::MemoryRegion::BASE, "GSP:SharedMemory");
+
+    thread_id = 0;
+    gpu_right_acquired = false;
+    first_initialization = true;
+};
+
+} // namespace GSP
+} // namespace Service
--- a/src/core/hle/service/gsp/gsp_gpu.h
+++ b/src/core/hle/service/gsp/gsp_gpu.h
@ -0,0 +1,368 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "core/hle/kernel/hle_ipc.h"
+#include "core/hle/result.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class Event;
+class SharedMemory;
+} // namespace Kernel
+
+namespace Service {
+namespace GSP {
+
+/// GSP interrupt ID
+enum class InterruptId : u8 {
+    PSC0 = 0x00,
+    PSC1 = 0x01,
+    PDC0 = 0x02, // Seems called every vertical screen line
+    PDC1 = 0x03, // Seems called every frame
+    PPF = 0x04,
+    P3D = 0x05,
+    DMA = 0x06,
+};
+
+/// GSP command ID
+enum class CommandId : u32 {
+    REQUEST_DMA = 0x00,
+    /// Submits a commandlist for execution by the GPU.
+    SUBMIT_GPU_CMDLIST = 0x01,
+
+    // Fills a given memory range with a particular value
+    SET_MEMORY_FILL = 0x02,
+
+    // Copies an image and optionally performs color-conversion or scaling.
+    // This is highly similar to the GameCube's EFB copy feature
+    SET_DISPLAY_TRANSFER = 0x03,
+
+    // Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path
+    SET_TEXTURE_COPY = 0x04,
+    /// Flushes up to 3 cache regions in a single command.
+    CACHE_FLUSH = 0x05,
+};
+
+/// GSP thread interrupt relay queue
+struct InterruptRelayQueue {
+    // Index of last interrupt in the queue
+    u8 index;
+    // Number of interrupts remaining to be processed by the userland code
+    u8 number_interrupts;
+    // Error code - zero on success, otherwise an error has occurred
+    u8 error_code;
+    u8 padding1;
+
+    u32 missed_PDC0;
+    u32 missed_PDC1;
+
+    InterruptId slot[0x34]; ///< Interrupt ID slots
+};
+static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size");
+
+struct FrameBufferInfo {
+    u32 active_fb; // 0 = first, 1 = second
+    u32 address_left;
+    u32 address_right;
+    u32 stride;   // maps to 0x1EF00X90 ?
+    u32 format;   // maps to 0x1EF00X70 ?
+    u32 shown_fb; // maps to 0x1EF00X78 ?
+    u32 unknown;
+};
+static_assert(sizeof(FrameBufferInfo) == 0x1c, "Struct has incorrect size");
+
+struct FrameBufferUpdate {
+    BitField<0, 1, u8> index;    // Index used for GSP::SetBufferSwap
+    BitField<0, 1, u8> is_dirty; // true if GSP should update GPU framebuffer registers
+    u16 pad1;
+
+    FrameBufferInfo framebuffer_info[2];
+
+    u32 pad2;
+};
+static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size");
+// TODO: Not sure if this padding is correct.
+// Chances are the second block is stored at offset 0x24 rather than 0x20.
+#ifndef _MSC_VER
+static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20,
+              "FrameBufferInfo element has incorrect alignment");
+#endif
+
+/// GSP command
+struct Command {
+    BitField<0, 8, CommandId> id;
+
+    union {
+        struct {
+            u32 source_address;
+            u32 dest_address;
+            u32 size;
+        } dma_request;
+
+        struct {
+            u32 address;
+            u32 size;
+            u32 flags;
+            u32 unused[3];
+            u32 do_flush;
+        } submit_gpu_cmdlist;
+
+        struct {
+            u32 start1;
+            u32 value1;
+            u32 end1;
+
+            u32 start2;
+            u32 value2;
+            u32 end2;
+
+            u16 control1;
+            u16 control2;
+        } memory_fill;
+
+        struct {
+            u32 in_buffer_address;
+            u32 out_buffer_address;
+            u32 in_buffer_size;
+            u32 out_buffer_size;
+            u32 flags;
+        } display_transfer;
+
+        struct {
+            u32 in_buffer_address;
+            u32 out_buffer_address;
+            u32 size;
+            u32 in_width_gap;
+            u32 out_width_gap;
+            u32 flags;
+        } texture_copy;
+
+        struct {
+            struct {
+                u32 address;
+                u32 size;
+            } regions[3];
+        } cache_flush;
+
+        u8 raw_data[0x1C];
+    };
+};
+static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size");
+
+/// GSP shared memory GX command buffer header
+struct CommandBuffer {
+    union {
+        u32 hex;
+
+        // Current command index. This index is updated by GSP module after loading the command
+        // data, right before the command is processed. When this index is updated by GSP module,
+        // the total commands field is decreased by one as well.
+        BitField<0, 8, u32> index;
+
+        // Total commands to process, must not be value 0 when GSP module handles commands. This
+        // must be <=15 when writing a command to shared memory. This is incremented by the
+        // application when writing a command to shared memory, after increasing this value
+        // TriggerCmdReqQueue is only used if this field is value 1.
+        BitField<8, 8, u32> number_commands;
+    };
+
+    u32 unk[7];
+
+    Command commands[0xF];
+};
+static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size");
+
+class GSP_GPU final : public ServiceFramework<GSP_GPU> {
+public:
+    GSP_GPU();
+    ~GSP_GPU() = default;
+
+    /**
+     * Signals that the specified interrupt type has occurred to userland code
+     * @param interrupt_id ID of interrupt that is being signalled
+     */
+    void SignalInterrupt(InterruptId interrupt_id);
+
+    /**
+     * Retrieves the framebuffer info stored in the GSP shared memory for the
+     * specified screen index and thread id.
+     * @param thread_id GSP thread id of the process that accesses the structure that we are
+     * requesting.
+     * @param screen_index Index of the screen we are requesting (Top = 0, Bottom = 1).
+     * @returns FramebufferUpdate Information about the specified framebuffer.
+     */
+    FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index);
+
+private:
+    /**
+     * GSP_GPU::WriteHWRegs service function
+     *
+     * Writes sequential GSP GPU hardware registers
+     *
+     *  Inputs:
+     *      1 : address of first GPU register
+     *      2 : number of registers to write sequentially
+     *      4 : pointer to source data array
+     */
+    void WriteHWRegs(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::WriteHWRegsWithMask service function
+     *
+     * Updates sequential GSP GPU hardware registers using masks
+     *
+     *  Inputs:
+     *      1 : address of first GPU register
+     *      2 : number of registers to update sequentially
+     *      4 : pointer to source data array
+     *      6 : pointer to mask array
+     */
+    void WriteHWRegsWithMask(Kernel::HLERequestContext& ctx);
+
+    /// Read a GSP GPU hardware register
+    void ReadHWRegs(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::SetBufferSwap service function
+     *
+     * Updates GPU display framebuffer configuration using the specified parameters.
+     *
+     *  Inputs:
+     *      1 : Screen ID (0 = top screen, 1 = bottom screen)
+     *      2-7 : FrameBufferInfo structure
+     *  Outputs:
+     *      1: Result code
+     */
+    void SetBufferSwap(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::FlushDataCache service function
+     *
+     * This Function is a no-op, We aren't emulating the CPU cache any time soon.
+     *
+     *  Inputs:
+     *      1 : Address
+     *      2 : Size
+     *      3 : Value 0, some descriptor for the KProcess Handle
+     *      4 : KProcess handle
+     *  Outputs:
+     *      1 : Result of function, 0 on success, otherwise error code
+     */
+    void FlushDataCache(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::SetLcdForceBlack service function
+     *
+     * Enable or disable REG_LCDCOLORFILL with the color black.
+     *
+     *  Inputs:
+     *      1: Black color fill flag (0 = don't fill, !0 = fill)
+     *  Outputs:
+     *      1: Result code
+     */
+    void SetLcdForceBlack(Kernel::HLERequestContext& ctx);
+
+    /// This triggers handling of the GX command written to the command buffer in shared memory.
+    void TriggerCmdReqQueue(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::SetAxiConfigQoSMode service function
+     *  Inputs:
+     *      1 : Mode, unused in emulator
+     *  Outputs:
+     *      1 : Result of function, 0 on success, otherwise error code
+     */
+    void SetAxiConfigQoSMode(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::RegisterInterruptRelayQueue service function
+     *  Inputs:
+     *      1 : "Flags" field, purpose is unknown
+     *      3 : Handle to GSP synchronization event
+     *  Outputs:
+     *      1 : Result of function, 0x2A07 on success, otherwise error code
+     *      2 : Thread index into GSP command buffer
+     *      4 : Handle to GSP shared memory
+     */
+    void RegisterInterruptRelayQueue(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::UnregisterInterruptRelayQueue service function
+     *  Outputs:
+     *      1 : Result of function, 0 on success, otherwise error code
+     */
+    void UnregisterInterruptRelayQueue(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::AcquireRight service function
+     *  Outputs:
+     *      1: Result code
+     */
+    void AcquireRight(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::ReleaseRight service function
+     *  Outputs:
+     *      1: Result code
+     */
+    void ReleaseRight(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::ImportDisplayCaptureInfo service function
+     *
+     * Returns information about the current framebuffer state
+     *
+     *  Inputs:
+     *      0: Header 0x00180000
+     *  Outputs:
+     *      0: Header Code[0x00180240]
+     *      1: Result code
+     *      2: Left framebuffer virtual address for the main screen
+     *      3: Right framebuffer virtual address for the main screen
+     *      4: Main screen framebuffer format
+     *      5: Main screen framebuffer width
+     *      6: Left framebuffer virtual address for the bottom screen
+     *      7: Right framebuffer virtual address for the bottom screen
+     *      8: Bottom screen framebuffer format
+     *      9: Bottom screen framebuffer width
+     */
+    void ImportDisplayCaptureInfo(Kernel::HLERequestContext& ctx);
+
+    /**
+     * GSP_GPU::StoreDataCache service function
+     *
+     * This Function is a no-op, We aren't emulating the CPU cache any time soon.
+     *
+     *  Inputs:
+     *      0 : Header code [0x001F0082]
+     *      1 : Address
+     *      2 : Size
+     *      3 : Value 0, some descriptor for the KProcess Handle
+     *      4 : KProcess handle
+     *  Outputs:
+     *      1 : Result of function, 0 on success, otherwise error code
+     */
+    void StoreDataCache(Kernel::HLERequestContext& ctx);
+
+    /// Event triggered when GSP interrupt has been signalled
+    Kernel::SharedPtr<Kernel::Event> interrupt_event;
+    /// GSP shared memoryings
+    Kernel::SharedPtr<Kernel::SharedMemory> shared_memory;
+    /// Thread index into interrupt relay queue
+    u32 thread_id = 0;
+
+    bool gpu_right_acquired = false;
+    bool first_initialization = true;
+};
+
+ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info);
+
+} // namespace GSP
+} // namespace Service