common: Rework timekeeping with native RDTSC and port to linux

2025-05-30 23:33:17 +00:00 · 2024-02-15 00:52:57 +02:00 · 2024-02-15 00:52:57 +02:00 · fe43558779
commit fe43558779
parent acfa56f6bc
37 changed files with 818 additions and 279 deletions
--- a/src/video_core/gpu_memory.cpp
+++ b/src/video_core/gpu_memory.cpp
@ -0,0 +1,188 @@
+#include "gpu_memory.h"
+#include <atomic>
+#include <xxh3.h>
+
+#include "common/singleton.h"
+
+void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size,
+                           const GPUObject& info) {
+    auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
+
+    return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info);
+}
+
+void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) {
+    auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
+
+    std::scoped_lock lock{gpumemory->m_mutex};
+
+    MemoryHeap h;
+    h.allocated_virtual_addr = virtual_addr;
+    h.allocated_size = size;
+
+    gpumemory->m_heaps.push_back(h);
+}
+
+u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); }
+
+bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) {
+    static std::atomic_uint64_t unique_id = 0;
+
+    VkPhysicalDeviceMemoryProperties memory_properties{};
+    vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties);
+
+    u32 index = 0;
+    for (; index < memory_properties.memoryTypeCount; index++) {
+        if ((mem->requirements.memoryTypeBits & (static_cast<uint32_t>(1) << index)) != 0 &&
+            (memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) {
+            break;
+        }
+    }
+
+    mem->type = index;
+    mem->offset = 0;
+
+    VkMemoryAllocateInfo alloc_info{};
+    alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+    alloc_info.pNext = nullptr;
+    alloc_info.allocationSize = mem->requirements.size;
+    alloc_info.memoryTypeIndex = index;
+
+    mem->unique_id = ++unique_id;
+
+    auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory);
+
+    if (result == VK_SUCCESS) {
+        return true;
+    }
+    return false;
+}
+
+void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) {
+    auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
+    gpumemory->flushAllHeaps(ctx);
+}
+
+int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) {
+    int index = 0;
+    for (const auto& heap : m_heaps) {
+        if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) ||
+            ((virtual_addr + size - 1) >= heap.allocated_virtual_addr &&
+             (virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) {
+            return index;
+        }
+        index++;
+    }
+    return -1;
+}
+
+void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size,
+                                      int virtual_addr_num, const GPUObject& info) {
+    auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
+
+    std::scoped_lock lock{gpumemory->m_mutex};
+
+    int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]);
+
+    if (heap_id < 0) {
+        return nullptr;
+    }
+    auto& heap = m_heaps[heap_id];
+
+    ObjInfo objInfo = {};
+
+    // Copy parameters from info to obj
+    for (int i = 0; i < 8; i++) {
+        objInfo.obj_params[i] = info.obj_params[i];
+    }
+
+    objInfo.gpu_object.objectType = info.objectType;
+    objInfo.gpu_object.obj = nullptr;
+
+    for (int h = 0; h < virtual_addr_num; h++) {
+        if (info.check_hash) {
+            objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast<const u08*>(virtual_addr[h]), size[h]);
+        } else {
+            objInfo.hash[h] = 0;
+        }
+    }
+    objInfo.submit_id = submit_id;
+    objInfo.check_hash = info.check_hash;
+
+    objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem);
+
+    objInfo.update_func = info.getUpdateFunc();
+    int index = static_cast<int>(heap.objects.size());
+
+    HeapObject hobj{};
+    hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index);
+    hobj.info = objInfo;
+    hobj.free = false;
+    heap.objects.push_back(hobj);
+
+    return objInfo.gpu_object.obj;
+}
+
+GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) {
+    auto& heap = m_heaps[heap_id];
+
+    GPU::HeapBlock heapBlock{};
+    heapBlock.virtual_addr_num = virtual_addr_num;
+    for (int vi = 0; vi < virtual_addr_num; vi++) {
+        heapBlock.virtual_addr[vi] = virtual_addr[vi];
+        heapBlock.size[vi] = size[vi];
+    }
+    return heapBlock;
+}
+
+void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) {
+    auto& heap = m_heaps[heap_id];
+
+    auto& heapObj = heap.objects[obj_id];
+    auto& objInfo = heapObj.info;
+    bool need_update = false;
+
+    if (submit_id > objInfo.submit_id) {
+        uint64_t hash[3] = {};
+
+        for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
+            if (objInfo.check_hash) {
+                hash[i] = GPU::calculate_hash(reinterpret_cast<const uint8_t*>(heapObj.block.virtual_addr[i]), heapObj.block.size[i]);
+            } else {
+                hash[i] = 0;
+            }
+        }
+
+        for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
+            if (objInfo.hash[i] != hash[i]) {
+                need_update = true;
+                objInfo.hash[i] = hash[i];
+            }
+        }
+
+        if (submit_id != UINT64_MAX) {
+            objInfo.submit_id = submit_id;
+        }
+    }
+
+    if (need_update) {
+        objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size,
+                            heapObj.block.virtual_addr_num);
+    }
+}
+
+void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) {
+    std::scoped_lock lock{m_mutex};
+
+    int heap_id = 0;
+    for (auto& heap : m_heaps) {
+        int index = 0;
+        for (auto& heapObj : heap.objects) {
+            if (!heapObj.free) {
+                update(UINT64_MAX, ctx, heap_id, index);
+            }
+            index++;
+        }
+        heap_id++;
+    }
+}
--- a/src/video_core/gpu_memory.h
+++ b/src/video_core/gpu_memory.h
@ -0,0 +1,86 @@
+#pragma once
+
+#include "common/types.h"
+#include <mutex>
+#include <vector>
+
+namespace VideoCore {
+
+class GPUObject;
+
+enum class MemoryMode : u32 {
+    NoAccess = 0,
+    Read = 1,
+    Write = 2,
+    ReadWrite = 3,
+};
+
+enum class MemoryObjectType : u64 {
+    Invalid,
+    VideoOutBuffer,
+};
+
+struct GpuMemoryObject {
+    MemoryObjectType object_type = MemoryObjectType::Invalid;
+    void* obj = nullptr;
+};
+
+struct HeapBlock {
+    std::array<u64, 3> virtual_address{};
+    std::array<u64, 3> size{};
+    u32 virtual_addr_num = 0;
+};
+
+class GPUObject {
+public:
+    GPUObject() = default;
+    virtual ~GPUObject() = default;
+    u64 obj_params[8] = {};
+    bool check_hash = false;
+    bool isReadOnly = false;
+    MemoryObjectType objectType = MemoryObjectType::Invalid;
+};
+
+struct ObjInfo {
+    std::array<u64, 8> obj_params{};
+    GpuMemoryObject gpu_object;
+    std::array<u64, 3> hash{};
+    u64 submit_id = 0;
+    bool check_hash = false;
+};
+
+struct HeapObject {
+    HeapBlock block;
+    ObjInfo info;
+    bool free = true;
+};
+struct MemoryHeap {
+    u64 allocated_virtual_addr = 0;
+    u64 allocated_size = 0;
+    std::vector<HeapObject> objects;
+};
+
+class GPUMemory {
+public:
+    GPUMemory() {}
+    virtual ~GPUMemory() {}
+    int getHeapId(u64 vaddr, u64 size);
+    void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr,
+                          const u64* size, int virtual_addr_num, const GPUObject& info);
+    HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id);
+    void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id);
+    void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx);
+
+private:
+    std::mutex m_mutex;
+    std::vector<MemoryHeap> m_heaps;
+};
+
+void memorySetAllocArea(u64 virtual_addr, u64 size);
+void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size,
+                      const GPUObject& info);
+u64 calculate_hash(const u08* buf, u64 size);
+bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem);
+void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx);
+
+}  // namespace VideoCore
--- a/src/video_core/tile_manager.cpp
+++ b/src/video_core/tile_manager.cpp
@ -0,0 +1,151 @@
+#include <bit>
+#include <cstring>
+#include "video_core/tile_manager.h"
+
+namespace VideoCore {
+
+class TileManager32 {
+  public:
+    u32 m_macro_tile_height = 0;
+    u32 m_bank_height = 0;
+    u32 m_num_banks = 0;
+    u32 m_num_pipes = 0;
+    u32 m_padded_width = 0;
+    u32 m_padded_height = 0;
+    u32 m_pipe_bits = 0;
+    u32 m_bank_bits = 0;
+
+    TileManager32(u32 width, u32 height, bool is_neo) {
+        m_macro_tile_height = (is_neo ? 128 : 64);
+        m_bank_height = is_neo ? 2 : 1;
+        m_num_banks = is_neo ? 8 : 16;
+        m_num_pipes = is_neo ? 16 : 8;
+        m_padded_width = width;
+        if (height == 1080) {
+            m_padded_height = is_neo ? 1152 : 1088;
+        }
+        if (height == 720) {
+            m_padded_height = 768;
+        }
+        m_pipe_bits = is_neo ? 4 : 3;
+        m_bank_bits = is_neo ? 3 : 4;
+    }
+
+    static u32 GetElementIndex(u32 x, u32 y) {
+        u32 elem = 0;
+        elem |= ((x >> 0u) & 0x1u) << 0u;
+        elem |= ((x >> 1u) & 0x1u) << 1u;
+        elem |= ((y >> 0u) & 0x1u) << 2u;
+        elem |= ((x >> 2u) & 0x1u) << 3u;
+        elem |= ((y >> 1u) & 0x1u) << 4u;
+        elem |= ((y >> 2u) & 0x1u) << 5u;
+
+        return elem;
+    }
+
+    static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) {
+        u32 pipe = 0;
+
+        if (!is_neo) {
+            pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
+            pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
+            pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
+        } else {
+            pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
+            pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
+            pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
+            pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
+        }
+
+        return pipe;
+    }
+
+    static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) {
+        const u32 x_shift_offset = std::bit_width(bank_width * num_pipes);
+        const u32 y_shift_offset = std::bit_width(bank_height);
+        const u32 xs = x >> x_shift_offset;
+        const u32 ys = y >> y_shift_offset;
+        u32 bank = 0;
+        switch (num_banks) {
+            case 8:
+                bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
+                bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
+                bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
+                break;
+            case 16:
+                bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
+                bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
+                bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
+                bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
+                break;
+            default:;
+        }
+
+        return bank;
+    }
+
+    u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const {
+        u64 element_index = GetElementIndex(x, y);
+
+        u32 xh = x;
+        u32 yh = y;
+        u64 pipe = GetPipeIndex(xh, yh, is_neo);
+        u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
+        u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
+        u64 element_offset = (element_index * 32);
+        u64 tile_split_slice = 0;
+
+        if (tile_bytes > 512) {
+            tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
+            element_offset %= (static_cast<u64>(512) * 8);
+            tile_bytes = 512;
+        }
+
+        u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
+        u64 macro_tiles_per_row = m_padded_width / 128;
+        u64 macro_tile_row_index = y / m_macro_tile_height;
+        u64 macro_tile_column_index = x / 128;
+        u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
+        u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
+        u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
+        u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
+        u64 slice_offset = tile_split_slice * slice_bytes;
+        u64 tile_row_index = (y / 8) % m_bank_height;
+        u64 tile_index = tile_row_index;
+        u64 tile_offset = tile_index * tile_bytes;
+
+        u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
+        bank ^= tile_split_slice_rotation;
+        bank &= (m_num_banks - 1);
+
+        u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
+        u64 bit_offset = total_offset & 0x7u;
+        total_offset /= 8;
+
+        u64 pipe_interleave_offset = total_offset & 0xffu;
+        u64 offset = total_offset >> 8u;
+        u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits));
+
+        return ((byte_offset << 3u) | bit_offset) / 8;
+    }
+};
+
+void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) {
+    const TileManager32 t{width, height, is_neo};
+    for (u32 y = 0; y < height; y++) {
+        u32 x = 0;
+        u64 linear_offset = y * width * 4;
+
+        for (; x + 1 < width; x += 2) {
+            auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
+            std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
+            linear_offset += sizeof(u64);
+        }
+        if (x < width) {
+            auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
+            std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
+        }
+    }
+}
+
+} // namespace VideoCore
--- a/src/video_core/tile_manager.h
+++ b/src/video_core/tile_manager.h
@ -0,0 +1,9 @@
+#pragma once
+
+#include "common/types.h"
+
+namespace VideoCore {
+
+void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo);
+
+} // namespace VideoCore