common: Rework timekeeping with native RDTSC and port to linux

This commit is contained in:
GPUCode 2024-02-15 00:52:57 +02:00
parent acfa56f6bc
commit fe43558779
37 changed files with 818 additions and 279 deletions

View file

@ -0,0 +1,188 @@
#include "gpu_memory.h"
#include <atomic>
#include <xxh3.h>
#include "common/singleton.h"
void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size,
const GPUObject& info) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info);
}
void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
std::scoped_lock lock{gpumemory->m_mutex};
MemoryHeap h;
h.allocated_virtual_addr = virtual_addr;
h.allocated_size = size;
gpumemory->m_heaps.push_back(h);
}
u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); }
bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) {
static std::atomic_uint64_t unique_id = 0;
VkPhysicalDeviceMemoryProperties memory_properties{};
vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties);
u32 index = 0;
for (; index < memory_properties.memoryTypeCount; index++) {
if ((mem->requirements.memoryTypeBits & (static_cast<uint32_t>(1) << index)) != 0 &&
(memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) {
break;
}
}
mem->type = index;
mem->offset = 0;
VkMemoryAllocateInfo alloc_info{};
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
alloc_info.pNext = nullptr;
alloc_info.allocationSize = mem->requirements.size;
alloc_info.memoryTypeIndex = index;
mem->unique_id = ++unique_id;
auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory);
if (result == VK_SUCCESS) {
return true;
}
return false;
}
void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
gpumemory->flushAllHeaps(ctx);
}
int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) {
int index = 0;
for (const auto& heap : m_heaps) {
if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) ||
((virtual_addr + size - 1) >= heap.allocated_virtual_addr &&
(virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) {
return index;
}
index++;
}
return -1;
}
void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size,
int virtual_addr_num, const GPUObject& info) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
std::scoped_lock lock{gpumemory->m_mutex};
int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]);
if (heap_id < 0) {
return nullptr;
}
auto& heap = m_heaps[heap_id];
ObjInfo objInfo = {};
// Copy parameters from info to obj
for (int i = 0; i < 8; i++) {
objInfo.obj_params[i] = info.obj_params[i];
}
objInfo.gpu_object.objectType = info.objectType;
objInfo.gpu_object.obj = nullptr;
for (int h = 0; h < virtual_addr_num; h++) {
if (info.check_hash) {
objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast<const u08*>(virtual_addr[h]), size[h]);
} else {
objInfo.hash[h] = 0;
}
}
objInfo.submit_id = submit_id;
objInfo.check_hash = info.check_hash;
objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem);
objInfo.update_func = info.getUpdateFunc();
int index = static_cast<int>(heap.objects.size());
HeapObject hobj{};
hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index);
hobj.info = objInfo;
hobj.free = false;
heap.objects.push_back(hobj);
return objInfo.gpu_object.obj;
}
GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) {
auto& heap = m_heaps[heap_id];
GPU::HeapBlock heapBlock{};
heapBlock.virtual_addr_num = virtual_addr_num;
for (int vi = 0; vi < virtual_addr_num; vi++) {
heapBlock.virtual_addr[vi] = virtual_addr[vi];
heapBlock.size[vi] = size[vi];
}
return heapBlock;
}
void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) {
auto& heap = m_heaps[heap_id];
auto& heapObj = heap.objects[obj_id];
auto& objInfo = heapObj.info;
bool need_update = false;
if (submit_id > objInfo.submit_id) {
uint64_t hash[3] = {};
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
if (objInfo.check_hash) {
hash[i] = GPU::calculate_hash(reinterpret_cast<const uint8_t*>(heapObj.block.virtual_addr[i]), heapObj.block.size[i]);
} else {
hash[i] = 0;
}
}
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
if (objInfo.hash[i] != hash[i]) {
need_update = true;
objInfo.hash[i] = hash[i];
}
}
if (submit_id != UINT64_MAX) {
objInfo.submit_id = submit_id;
}
}
if (need_update) {
objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size,
heapObj.block.virtual_addr_num);
}
}
void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) {
std::scoped_lock lock{m_mutex};
int heap_id = 0;
for (auto& heap : m_heaps) {
int index = 0;
for (auto& heapObj : heap.objects) {
if (!heapObj.free) {
update(UINT64_MAX, ctx, heap_id, index);
}
index++;
}
heap_id++;
}
}

View file

@ -0,0 +1,86 @@
#pragma once
#include "common/types.h"
#include <mutex>
#include <vector>
namespace VideoCore {
class GPUObject;
enum class MemoryMode : u32 {
NoAccess = 0,
Read = 1,
Write = 2,
ReadWrite = 3,
};
enum class MemoryObjectType : u64 {
Invalid,
VideoOutBuffer,
};
struct GpuMemoryObject {
MemoryObjectType object_type = MemoryObjectType::Invalid;
void* obj = nullptr;
};
struct HeapBlock {
std::array<u64, 3> virtual_address{};
std::array<u64, 3> size{};
u32 virtual_addr_num = 0;
};
class GPUObject {
public:
GPUObject() = default;
virtual ~GPUObject() = default;
u64 obj_params[8] = {};
bool check_hash = false;
bool isReadOnly = false;
MemoryObjectType objectType = MemoryObjectType::Invalid;
};
struct ObjInfo {
std::array<u64, 8> obj_params{};
GpuMemoryObject gpu_object;
std::array<u64, 3> hash{};
u64 submit_id = 0;
bool check_hash = false;
};
struct HeapObject {
HeapBlock block;
ObjInfo info;
bool free = true;
};
struct MemoryHeap {
u64 allocated_virtual_addr = 0;
u64 allocated_size = 0;
std::vector<HeapObject> objects;
};
class GPUMemory {
public:
GPUMemory() {}
virtual ~GPUMemory() {}
int getHeapId(u64 vaddr, u64 size);
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr,
const u64* size, int virtual_addr_num, const GPUObject& info);
HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id);
void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id);
void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx);
private:
std::mutex m_mutex;
std::vector<MemoryHeap> m_heaps;
};
void memorySetAllocArea(u64 virtual_addr, u64 size);
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size,
const GPUObject& info);
u64 calculate_hash(const u08* buf, u64 size);
bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem);
void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx);
} // namespace VideoCore

View file

@ -0,0 +1,151 @@
#include <bit>
#include <cstring>
#include "video_core/tile_manager.h"
namespace VideoCore {
class TileManager32 {
public:
u32 m_macro_tile_height = 0;
u32 m_bank_height = 0;
u32 m_num_banks = 0;
u32 m_num_pipes = 0;
u32 m_padded_width = 0;
u32 m_padded_height = 0;
u32 m_pipe_bits = 0;
u32 m_bank_bits = 0;
TileManager32(u32 width, u32 height, bool is_neo) {
m_macro_tile_height = (is_neo ? 128 : 64);
m_bank_height = is_neo ? 2 : 1;
m_num_banks = is_neo ? 8 : 16;
m_num_pipes = is_neo ? 16 : 8;
m_padded_width = width;
if (height == 1080) {
m_padded_height = is_neo ? 1152 : 1088;
}
if (height == 720) {
m_padded_height = 768;
}
m_pipe_bits = is_neo ? 4 : 3;
m_bank_bits = is_neo ? 3 : 4;
}
static u32 GetElementIndex(u32 x, u32 y) {
u32 elem = 0;
elem |= ((x >> 0u) & 0x1u) << 0u;
elem |= ((x >> 1u) & 0x1u) << 1u;
elem |= ((y >> 0u) & 0x1u) << 2u;
elem |= ((x >> 2u) & 0x1u) << 3u;
elem |= ((y >> 1u) & 0x1u) << 4u;
elem |= ((y >> 2u) & 0x1u) << 5u;
return elem;
}
static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) {
u32 pipe = 0;
if (!is_neo) {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
} else {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
}
return pipe;
}
static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) {
const u32 x_shift_offset = std::bit_width(bank_width * num_pipes);
const u32 y_shift_offset = std::bit_width(bank_height);
const u32 xs = x >> x_shift_offset;
const u32 ys = y >> y_shift_offset;
u32 bank = 0;
switch (num_banks) {
case 8:
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
break;
case 16:
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
break;
default:;
}
return bank;
}
u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const {
u64 element_index = GetElementIndex(x, y);
u32 xh = x;
u32 yh = y;
u64 pipe = GetPipeIndex(xh, yh, is_neo);
u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
u64 element_offset = (element_index * 32);
u64 tile_split_slice = 0;
if (tile_bytes > 512) {
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
element_offset %= (static_cast<u64>(512) * 8);
tile_bytes = 512;
}
u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
u64 macro_tiles_per_row = m_padded_width / 128;
u64 macro_tile_row_index = y / m_macro_tile_height;
u64 macro_tile_column_index = x / 128;
u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
u64 slice_offset = tile_split_slice * slice_bytes;
u64 tile_row_index = (y / 8) % m_bank_height;
u64 tile_index = tile_row_index;
u64 tile_offset = tile_index * tile_bytes;
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
bank ^= tile_split_slice_rotation;
bank &= (m_num_banks - 1);
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
u64 bit_offset = total_offset & 0x7u;
total_offset /= 8;
u64 pipe_interleave_offset = total_offset & 0xffu;
u64 offset = total_offset >> 8u;
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits));
return ((byte_offset << 3u) | bit_offset) / 8;
}
};
void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) {
const TileManager32 t{width, height, is_neo};
for (u32 y = 0; y < height; y++) {
u32 x = 0;
u64 linear_offset = y * width * 4;
for (; x + 1 < width; x += 2) {
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
linear_offset += sizeof(u64);
}
if (x < width) {
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
}
}
}
} // namespace VideoCore

View file

@ -0,0 +1,9 @@
#pragma once
#include "common/types.h"
namespace VideoCore {
void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo);
} // namespace VideoCore