mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-02 08:43:16 +00:00
renderer_vulkan: Commize and adjust buffer bindings (#1412)
* shader_recompiler: Implement finite cmp class * shader_recompiler: Implement more opcodes * renderer_vulkan: Commonize buffer binding * liverpool: More dma data impl * fix * copy_shader: Handle additional instructions from Knack * translator: Add V_CMPX_GE_I32
This commit is contained in:
parent
47ba6c6344
commit
87f8fea4de
23 changed files with 438 additions and 342 deletions
|
@ -142,6 +142,7 @@ public:
|
|||
VAddr cpu_addr = 0;
|
||||
bool is_picked{};
|
||||
bool is_coherent{};
|
||||
bool is_deleted{};
|
||||
int stream_score = 0;
|
||||
size_t size_bytes = 0;
|
||||
std::span<u8> mapped_data;
|
||||
|
|
|
@ -20,7 +20,7 @@ static constexpr size_t StagingBufferSize = 1_GB;
|
|||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
texture_cache{texture_cache_}, tracker{tracker_},
|
||||
|
@ -70,11 +70,10 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
|||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
memory_tracker.ForEachDownloadRange<true>(
|
||||
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
const VAddr buffer_addr = buffer.CpuAddr();
|
||||
const auto add_download = [&](VAddr start, VAddr end, u64) {
|
||||
const auto add_download = [&](VAddr start, VAddr end) {
|
||||
const u64 new_offset = start - buffer_addr;
|
||||
const u64 new_size = end - start;
|
||||
copies.push_back(vk::BufferCopy{
|
||||
|
@ -82,12 +81,10 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||
.dstOffset = total_size_bytes,
|
||||
.size = new_size,
|
||||
});
|
||||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 64ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
largest_copy = std::max(largest_copy, new_size);
|
||||
total_size_bytes += new_size;
|
||||
};
|
||||
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
|
||||
gpu_modified_ranges.Subtract(device_addr_out, range_size);
|
||||
});
|
||||
if (total_size_bytes == 0) {
|
||||
return;
|
||||
|
@ -181,6 +178,9 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
.divisor = 1,
|
||||
});
|
||||
}
|
||||
if (ranges.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
|
@ -269,48 +269,62 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
|
|||
return regs.num_indices;
|
||||
}
|
||||
|
||||
void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) {
|
||||
ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned");
|
||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const Buffer* buffer = [&] {
|
||||
if (is_gds) {
|
||||
return &gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
const vk::BufferMemoryBarrier2 buf_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = gds_buffer.Handle(),
|
||||
.offset = gds_offset,
|
||||
.size = sizeof(u32),
|
||||
.buffer = buffer->Handle(),
|
||||
.offset = buffer->Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &buf_barrier,
|
||||
});
|
||||
cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value);
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer) {
|
||||
bool is_texel_buffer, BufferId buffer_id) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
// use device local stream buffer to reduce renderpass breaks.
|
||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
||||
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
|
||||
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
// use device local stream buffer to reduce renderpass breaks.
|
||||
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
||||
return {&stream_buffer, offset};
|
||||
}
|
||||
|
||||
const BufferId buffer_id = FindBuffer(device_addr, size);
|
||||
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
|
||||
buffer_id = FindBuffer(device_addr, size);
|
||||
}
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
||||
if (is_written) {
|
||||
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
||||
gpu_modified_ranges.Add(device_addr, size);
|
||||
}
|
||||
return {&buffer, buffer.Offset(device_addr)};
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) {
|
||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||
const BufferId buffer_id = page_table[page];
|
||||
if (buffer_id) {
|
||||
|
@ -474,7 +488,7 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
|||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
||||
DeleteBuffer(overlap_id, true);
|
||||
DeleteBuffer(overlap_id);
|
||||
}
|
||||
|
||||
BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
|
@ -529,7 +543,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
const auto add_copy = [&](VAddr device_addr_out, u64 range_size) {
|
||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = total_size_bytes,
|
||||
.dstOffset = device_addr_out - buffer_start,
|
||||
|
@ -537,11 +551,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||
});
|
||||
total_size_bytes += range_size;
|
||||
largest_copy = std::max(largest_copy, range_size);
|
||||
};
|
||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
add_copy(device_addr_out, range_size);
|
||||
// Prevent uploading to gpu modified regions.
|
||||
// gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy);
|
||||
});
|
||||
SCOPE_EXIT {
|
||||
if (is_texel_buffer) {
|
||||
|
@ -654,14 +663,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
|||
return true;
|
||||
}
|
||||
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
||||
// Mark the whole buffer as CPU written to stop tracking CPU writes
|
||||
if (!do_not_mark) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
}
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
Unregister(buffer_id);
|
||||
scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); });
|
||||
buffer.is_deleted = true;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -53,7 +54,7 @@ public:
|
|||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
const AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
PageManager& tracker);
|
||||
~BufferCache();
|
||||
|
||||
|
@ -80,15 +81,16 @@ public:
|
|||
/// Bind host index buffer for the current draw.
|
||||
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||
|
||||
/// Writes a value to GDS buffer.
|
||||
void InlineDataToGds(u32 gds_offset, u32 value);
|
||||
/// Writes a value to GPU buffer.
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer = false);
|
||||
bool is_texel_buffer = false,
|
||||
BufferId buffer_id = {});
|
||||
|
||||
/// Obtains a temporary buffer for usage in texture cache.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
|
||||
/// Attempts to obtain a buffer without modifying the cache contents.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size);
|
||||
|
||||
/// Return true when a region is registered on the cache
|
||||
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
||||
|
@ -99,6 +101,8 @@ public:
|
|||
/// Return true when a CPU region is modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||
|
||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||
|
@ -119,8 +123,6 @@ private:
|
|||
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
||||
|
||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||
|
||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||
|
||||
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
|
||||
|
@ -138,11 +140,11 @@ private:
|
|||
|
||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
|
||||
void DeleteBuffer(BufferId buffer_id);
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
const AmdGpu::Liverpool* liverpool;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
TextureCache& texture_cache;
|
||||
PageManager& tracker;
|
||||
StreamBuffer staging_buffer;
|
||||
|
@ -150,6 +152,7 @@ private:
|
|||
Buffer gds_buffer;
|
||||
std::mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
vk::BufferView null_buffer_view;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue