mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-30 23:33:17 +00:00
common: Rework timekeeping with native RDTSC and port to linux
This commit is contained in:
parent
acfa56f6bc
commit
fe43558779
37 changed files with 818 additions and 279 deletions
188
src/video_core/gpu_memory.cpp
Normal file
188
src/video_core/gpu_memory.cpp
Normal file
|
@ -0,0 +1,188 @@
|
|||
#include "gpu_memory.h"
|
||||
#include <atomic>
|
||||
#include <xxh3.h>
|
||||
|
||||
#include "common/singleton.h"
|
||||
|
||||
void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size,
|
||||
const GPUObject& info) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
|
||||
return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info);
|
||||
}
|
||||
|
||||
void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
|
||||
std::scoped_lock lock{gpumemory->m_mutex};
|
||||
|
||||
MemoryHeap h;
|
||||
h.allocated_virtual_addr = virtual_addr;
|
||||
h.allocated_size = size;
|
||||
|
||||
gpumemory->m_heaps.push_back(h);
|
||||
}
|
||||
|
||||
u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); }
|
||||
|
||||
bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) {
|
||||
static std::atomic_uint64_t unique_id = 0;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties memory_properties{};
|
||||
vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties);
|
||||
|
||||
u32 index = 0;
|
||||
for (; index < memory_properties.memoryTypeCount; index++) {
|
||||
if ((mem->requirements.memoryTypeBits & (static_cast<uint32_t>(1) << index)) != 0 &&
|
||||
(memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mem->type = index;
|
||||
mem->offset = 0;
|
||||
|
||||
VkMemoryAllocateInfo alloc_info{};
|
||||
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
alloc_info.pNext = nullptr;
|
||||
alloc_info.allocationSize = mem->requirements.size;
|
||||
alloc_info.memoryTypeIndex = index;
|
||||
|
||||
mem->unique_id = ++unique_id;
|
||||
|
||||
auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory);
|
||||
|
||||
if (result == VK_SUCCESS) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
gpumemory->flushAllHeaps(ctx);
|
||||
}
|
||||
|
||||
int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) {
|
||||
int index = 0;
|
||||
for (const auto& heap : m_heaps) {
|
||||
if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) ||
|
||||
((virtual_addr + size - 1) >= heap.allocated_virtual_addr &&
|
||||
(virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) {
|
||||
return index;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size,
|
||||
int virtual_addr_num, const GPUObject& info) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
|
||||
std::scoped_lock lock{gpumemory->m_mutex};
|
||||
|
||||
int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]);
|
||||
|
||||
if (heap_id < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& heap = m_heaps[heap_id];
|
||||
|
||||
ObjInfo objInfo = {};
|
||||
|
||||
// Copy parameters from info to obj
|
||||
for (int i = 0; i < 8; i++) {
|
||||
objInfo.obj_params[i] = info.obj_params[i];
|
||||
}
|
||||
|
||||
objInfo.gpu_object.objectType = info.objectType;
|
||||
objInfo.gpu_object.obj = nullptr;
|
||||
|
||||
for (int h = 0; h < virtual_addr_num; h++) {
|
||||
if (info.check_hash) {
|
||||
objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast<const u08*>(virtual_addr[h]), size[h]);
|
||||
} else {
|
||||
objInfo.hash[h] = 0;
|
||||
}
|
||||
}
|
||||
objInfo.submit_id = submit_id;
|
||||
objInfo.check_hash = info.check_hash;
|
||||
|
||||
objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem);
|
||||
|
||||
objInfo.update_func = info.getUpdateFunc();
|
||||
int index = static_cast<int>(heap.objects.size());
|
||||
|
||||
HeapObject hobj{};
|
||||
hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index);
|
||||
hobj.info = objInfo;
|
||||
hobj.free = false;
|
||||
heap.objects.push_back(hobj);
|
||||
|
||||
return objInfo.gpu_object.obj;
|
||||
}
|
||||
|
||||
GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) {
|
||||
auto& heap = m_heaps[heap_id];
|
||||
|
||||
GPU::HeapBlock heapBlock{};
|
||||
heapBlock.virtual_addr_num = virtual_addr_num;
|
||||
for (int vi = 0; vi < virtual_addr_num; vi++) {
|
||||
heapBlock.virtual_addr[vi] = virtual_addr[vi];
|
||||
heapBlock.size[vi] = size[vi];
|
||||
}
|
||||
return heapBlock;
|
||||
}
|
||||
|
||||
void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) {
|
||||
auto& heap = m_heaps[heap_id];
|
||||
|
||||
auto& heapObj = heap.objects[obj_id];
|
||||
auto& objInfo = heapObj.info;
|
||||
bool need_update = false;
|
||||
|
||||
if (submit_id > objInfo.submit_id) {
|
||||
uint64_t hash[3] = {};
|
||||
|
||||
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
|
||||
if (objInfo.check_hash) {
|
||||
hash[i] = GPU::calculate_hash(reinterpret_cast<const uint8_t*>(heapObj.block.virtual_addr[i]), heapObj.block.size[i]);
|
||||
} else {
|
||||
hash[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
|
||||
if (objInfo.hash[i] != hash[i]) {
|
||||
need_update = true;
|
||||
objInfo.hash[i] = hash[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_id != UINT64_MAX) {
|
||||
objInfo.submit_id = submit_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_update) {
|
||||
objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size,
|
||||
heapObj.block.virtual_addr_num);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
|
||||
int heap_id = 0;
|
||||
for (auto& heap : m_heaps) {
|
||||
int index = 0;
|
||||
for (auto& heapObj : heap.objects) {
|
||||
if (!heapObj.free) {
|
||||
update(UINT64_MAX, ctx, heap_id, index);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
heap_id++;
|
||||
}
|
||||
}
|
86
src/video_core/gpu_memory.h
Normal file
86
src/video_core/gpu_memory.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class GPUObject;
|
||||
|
||||
enum class MemoryMode : u32 {
|
||||
NoAccess = 0,
|
||||
Read = 1,
|
||||
Write = 2,
|
||||
ReadWrite = 3,
|
||||
};
|
||||
|
||||
enum class MemoryObjectType : u64 {
|
||||
Invalid,
|
||||
VideoOutBuffer,
|
||||
};
|
||||
|
||||
struct GpuMemoryObject {
|
||||
MemoryObjectType object_type = MemoryObjectType::Invalid;
|
||||
void* obj = nullptr;
|
||||
};
|
||||
|
||||
struct HeapBlock {
|
||||
std::array<u64, 3> virtual_address{};
|
||||
std::array<u64, 3> size{};
|
||||
u32 virtual_addr_num = 0;
|
||||
};
|
||||
|
||||
class GPUObject {
|
||||
public:
|
||||
GPUObject() = default;
|
||||
virtual ~GPUObject() = default;
|
||||
u64 obj_params[8] = {};
|
||||
bool check_hash = false;
|
||||
bool isReadOnly = false;
|
||||
MemoryObjectType objectType = MemoryObjectType::Invalid;
|
||||
};
|
||||
|
||||
struct ObjInfo {
|
||||
std::array<u64, 8> obj_params{};
|
||||
GpuMemoryObject gpu_object;
|
||||
std::array<u64, 3> hash{};
|
||||
u64 submit_id = 0;
|
||||
bool check_hash = false;
|
||||
};
|
||||
|
||||
struct HeapObject {
|
||||
HeapBlock block;
|
||||
ObjInfo info;
|
||||
bool free = true;
|
||||
};
|
||||
struct MemoryHeap {
|
||||
u64 allocated_virtual_addr = 0;
|
||||
u64 allocated_size = 0;
|
||||
std::vector<HeapObject> objects;
|
||||
};
|
||||
|
||||
class GPUMemory {
|
||||
public:
|
||||
GPUMemory() {}
|
||||
virtual ~GPUMemory() {}
|
||||
int getHeapId(u64 vaddr, u64 size);
|
||||
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr,
|
||||
const u64* size, int virtual_addr_num, const GPUObject& info);
|
||||
HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id);
|
||||
void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id);
|
||||
void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx);
|
||||
|
||||
private:
|
||||
std::mutex m_mutex;
|
||||
std::vector<MemoryHeap> m_heaps;
|
||||
};
|
||||
|
||||
void memorySetAllocArea(u64 virtual_addr, u64 size);
|
||||
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size,
|
||||
const GPUObject& info);
|
||||
u64 calculate_hash(const u08* buf, u64 size);
|
||||
bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem);
|
||||
void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx);
|
||||
|
||||
} // namespace VideoCore
|
151
src/video_core/tile_manager.cpp
Normal file
151
src/video_core/tile_manager.cpp
Normal file
|
@ -0,0 +1,151 @@
|
|||
#include <bit>
|
||||
#include <cstring>
|
||||
#include "video_core/tile_manager.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class TileManager32 {
|
||||
public:
|
||||
u32 m_macro_tile_height = 0;
|
||||
u32 m_bank_height = 0;
|
||||
u32 m_num_banks = 0;
|
||||
u32 m_num_pipes = 0;
|
||||
u32 m_padded_width = 0;
|
||||
u32 m_padded_height = 0;
|
||||
u32 m_pipe_bits = 0;
|
||||
u32 m_bank_bits = 0;
|
||||
|
||||
TileManager32(u32 width, u32 height, bool is_neo) {
|
||||
m_macro_tile_height = (is_neo ? 128 : 64);
|
||||
m_bank_height = is_neo ? 2 : 1;
|
||||
m_num_banks = is_neo ? 8 : 16;
|
||||
m_num_pipes = is_neo ? 16 : 8;
|
||||
m_padded_width = width;
|
||||
if (height == 1080) {
|
||||
m_padded_height = is_neo ? 1152 : 1088;
|
||||
}
|
||||
if (height == 720) {
|
||||
m_padded_height = 768;
|
||||
}
|
||||
m_pipe_bits = is_neo ? 4 : 3;
|
||||
m_bank_bits = is_neo ? 3 : 4;
|
||||
}
|
||||
|
||||
static u32 GetElementIndex(u32 x, u32 y) {
|
||||
u32 elem = 0;
|
||||
elem |= ((x >> 0u) & 0x1u) << 0u;
|
||||
elem |= ((x >> 1u) & 0x1u) << 1u;
|
||||
elem |= ((y >> 0u) & 0x1u) << 2u;
|
||||
elem |= ((x >> 2u) & 0x1u) << 3u;
|
||||
elem |= ((y >> 1u) & 0x1u) << 4u;
|
||||
elem |= ((y >> 2u) & 0x1u) << 5u;
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) {
|
||||
u32 pipe = 0;
|
||||
|
||||
if (!is_neo) {
|
||||
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
|
||||
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
|
||||
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
|
||||
} else {
|
||||
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
|
||||
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
|
||||
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
|
||||
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
|
||||
}
|
||||
|
||||
return pipe;
|
||||
}
|
||||
|
||||
static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) {
|
||||
const u32 x_shift_offset = std::bit_width(bank_width * num_pipes);
|
||||
const u32 y_shift_offset = std::bit_width(bank_height);
|
||||
const u32 xs = x >> x_shift_offset;
|
||||
const u32 ys = y >> y_shift_offset;
|
||||
u32 bank = 0;
|
||||
switch (num_banks) {
|
||||
case 8:
|
||||
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
|
||||
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
|
||||
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
|
||||
break;
|
||||
case 16:
|
||||
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
|
||||
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
|
||||
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
|
||||
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const {
|
||||
u64 element_index = GetElementIndex(x, y);
|
||||
|
||||
u32 xh = x;
|
||||
u32 yh = y;
|
||||
u64 pipe = GetPipeIndex(xh, yh, is_neo);
|
||||
u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
|
||||
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
|
||||
u64 element_offset = (element_index * 32);
|
||||
u64 tile_split_slice = 0;
|
||||
|
||||
if (tile_bytes > 512) {
|
||||
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
|
||||
element_offset %= (static_cast<u64>(512) * 8);
|
||||
tile_bytes = 512;
|
||||
}
|
||||
|
||||
u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
|
||||
u64 macro_tiles_per_row = m_padded_width / 128;
|
||||
u64 macro_tile_row_index = y / m_macro_tile_height;
|
||||
u64 macro_tile_column_index = x / 128;
|
||||
u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
|
||||
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
|
||||
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
|
||||
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
|
||||
u64 slice_offset = tile_split_slice * slice_bytes;
|
||||
u64 tile_row_index = (y / 8) % m_bank_height;
|
||||
u64 tile_index = tile_row_index;
|
||||
u64 tile_offset = tile_index * tile_bytes;
|
||||
|
||||
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
|
||||
bank ^= tile_split_slice_rotation;
|
||||
bank &= (m_num_banks - 1);
|
||||
|
||||
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
|
||||
u64 bit_offset = total_offset & 0x7u;
|
||||
total_offset /= 8;
|
||||
|
||||
u64 pipe_interleave_offset = total_offset & 0xffu;
|
||||
u64 offset = total_offset >> 8u;
|
||||
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits));
|
||||
|
||||
return ((byte_offset << 3u) | bit_offset) / 8;
|
||||
}
|
||||
};
|
||||
|
||||
void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) {
|
||||
const TileManager32 t{width, height, is_neo};
|
||||
for (u32 y = 0; y < height; y++) {
|
||||
u32 x = 0;
|
||||
u64 linear_offset = y * width * 4;
|
||||
|
||||
for (; x + 1 < width; x += 2) {
|
||||
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
|
||||
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
|
||||
linear_offset += sizeof(u64);
|
||||
}
|
||||
if (x < width) {
|
||||
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
|
||||
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
9
src/video_core/tile_manager.h
Normal file
9
src/video_core/tile_manager.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo);
|
||||
|
||||
} // namespace VideoCore
|
Loading…
Add table
Add a link
Reference in a new issue