Merge branch 'master' into ssbo-align
This commit is contained in:
commit
1d11fe00a3
409 changed files with 37059 additions and 27474 deletions
|
@ -4,7 +4,7 @@
|
|||
add_subdirectory(host_shaders)
|
||||
|
||||
if(LIBVA_FOUND)
|
||||
set_source_files_properties(host1x/codecs/codec.cpp
|
||||
set_source_files_properties(host1x/ffmpeg/ffmpeg.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
|
||||
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
|
||||
endif()
|
||||
|
@ -15,6 +15,7 @@ add_library(video_core STATIC
|
|||
buffer_cache/buffer_cache.cpp
|
||||
buffer_cache/buffer_cache.h
|
||||
buffer_cache/memory_tracker_base.h
|
||||
buffer_cache/usage_tracker.h
|
||||
buffer_cache/word_manager.h
|
||||
cache_types.h
|
||||
cdma_pusher.cpp
|
||||
|
@ -66,6 +67,8 @@ add_library(video_core STATIC
|
|||
host1x/codecs/vp9.cpp
|
||||
host1x/codecs/vp9.h
|
||||
host1x/codecs/vp9_types.h
|
||||
host1x/ffmpeg/ffmpeg.cpp
|
||||
host1x/ffmpeg/ffmpeg.h
|
||||
host1x/control.cpp
|
||||
host1x/control.h
|
||||
host1x/host1x.cpp
|
||||
|
|
|
@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
|
|||
if (!channel_state) {
|
||||
return;
|
||||
}
|
||||
runtime.TickFrame(slot_buffers);
|
||||
|
||||
// Calculate hits and shots and move hit bits to the right
|
||||
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
|
||||
|
@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
|||
for (const IntervalType& add_interval : tmp_intervals) {
|
||||
common_ranges.add(add_interval);
|
||||
}
|
||||
runtime.CopyBuffer(dest_buffer, src_buffer, copies);
|
||||
const auto& copy = copies[0];
|
||||
src_buffer.MarkUsage(copy.src_offset, copy.size);
|
||||
dest_buffer.MarkUsage(copy.dst_offset, copy.size);
|
||||
runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
|
||||
if (has_new_downloads) {
|
||||
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
|
||||
}
|
||||
|
@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
|
|||
common_ranges.subtract(subtract_interval);
|
||||
|
||||
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
|
||||
auto& dest_buffer = slot_buffers[buffer];
|
||||
Buffer& dest_buffer = slot_buffers[buffer];
|
||||
const u32 offset = dest_buffer.Offset(*cpu_dst_address);
|
||||
runtime.ClearBuffer(dest_buffer, offset, size, value);
|
||||
dest_buffer.MarkUsage(offset, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
|
||||
const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
|
||||
async_downloads += std::make_pair(base_interval, 1);
|
||||
buffer.MarkUsage(copy.src_offset, copy.size);
|
||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
|
||||
normalized_copies.push_back(second_copy);
|
||||
}
|
||||
|
@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
// Have in mind the staging buffer offset for the copy
|
||||
copy.dst_offset += download_staging.offset;
|
||||
const std::array copies{copy};
|
||||
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies,
|
||||
false);
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
buffer.MarkUsage(copy.src_offset, copy.size);
|
||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
|
||||
}
|
||||
runtime.PostCopyBarrier();
|
||||
runtime.Finish();
|
||||
|
@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
|||
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
||||
std::memcpy(upload_staging.mapped_span.data(),
|
||||
draw_state.inline_index_draw_indexes.data(), size);
|
||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
|
||||
} else {
|
||||
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
||||
}
|
||||
|
@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
|||
offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
|
||||
runtime.BindIndexBuffer(buffer, new_offset, size);
|
||||
} else {
|
||||
buffer.MarkUsage(offset, size);
|
||||
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
|
||||
draw_state.index_buffer.first, draw_state.index_buffer.count,
|
||||
buffer, offset, size);
|
||||
|
@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
|||
|
||||
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, binding.size);
|
||||
|
||||
host_bindings.buffers.push_back(&buffer);
|
||||
host_bindings.offsets.push_back(offset);
|
||||
|
@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
}
|
||||
buffer.MarkUsage(offset, size);
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
|
||||
} else {
|
||||
|
@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
|||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
|
||||
|
||||
if (is_written) {
|
||||
|
@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
|||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
buffer.MarkUsage(offset, size);
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
|
@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
|||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
host_bindings.buffers.push_back(&buffer);
|
||||
host_bindings.offsets.push_back(offset);
|
||||
host_bindings.sizes.push_back(binding.size);
|
||||
host_bindings.sizes.push_back(size);
|
||||
}
|
||||
if (host_bindings.buffers.size() > 0) {
|
||||
runtime.BindTransformFeedbackBuffers(host_bindings);
|
||||
|
@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
|||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
|
||||
++binding_index;
|
||||
|
@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
|||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
const bool is_written =
|
||||
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
|
||||
|
||||
|
@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
|||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
buffer.MarkUsage(offset, size);
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
|
@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
|||
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
|
||||
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
||||
channel_state->vertex_buffers[index] = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = FindBuffer(*cpu_addr, size),
|
||||
.buffer_id = buffer_id,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1192,11 +1209,6 @@ void BufferCache<P>::UpdateDrawIndirect() {
|
|||
.size = static_cast<u32>(size),
|
||||
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
|
||||
};
|
||||
VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64);
|
||||
VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64);
|
||||
IntervalType interval{cpu_addr_start, cpu_addr_end};
|
||||
ClearDownload(interval);
|
||||
common_ranges.subtract(interval);
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32),
|
||||
|
@ -1406,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
|||
.dst_offset = dst_base_offset,
|
||||
.size = overlap.SizeBytes(),
|
||||
});
|
||||
runtime.CopyBuffer(new_buffer, overlap, copies);
|
||||
new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
|
||||
runtime.CopyBuffer(new_buffer, overlap, copies, true);
|
||||
DeleteBuffer(overlap_id, true);
|
||||
}
|
||||
|
||||
|
@ -1419,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
|
|||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
|
||||
new_buffer.MarkUsage(0, size_bytes);
|
||||
for (const BufferId overlap_id : overlap.ids) {
|
||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||
}
|
||||
|
@ -1472,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
|
|||
|
||||
template <class P>
|
||||
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||
return SynchronizeBufferImpl(buffer, cpu_addr, size);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||
boost::container::small_vector<BufferCopy, 4> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
|
@ -1498,51 +1508,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
|
|||
return false;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||
boost::container::small_vector<BufferCopy, 4> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
IntervalSet found_sets{};
|
||||
auto make_copies = [&] {
|
||||
for (auto& interval : found_sets) {
|
||||
const std::size_t sub_size = interval.upper() - interval.lower();
|
||||
const VAddr cpu_addr_ = interval.lower();
|
||||
copies.push_back(BufferCopy{
|
||||
.src_offset = total_size_bytes,
|
||||
.dst_offset = cpu_addr_ - buffer.CpuAddr(),
|
||||
.size = sub_size,
|
||||
});
|
||||
total_size_bytes += sub_size;
|
||||
largest_copy = std::max<u64>(largest_copy, sub_size);
|
||||
}
|
||||
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
||||
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
||||
};
|
||||
memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
|
||||
const VAddr base_adr = cpu_addr_out;
|
||||
const VAddr end_adr = base_adr + range_size;
|
||||
const IntervalType add_interval{base_adr, end_adr};
|
||||
found_sets.add(add_interval);
|
||||
});
|
||||
if (found_sets.empty()) {
|
||||
return true;
|
||||
}
|
||||
const IntervalType search_interval{cpu_addr, cpu_addr + size};
|
||||
auto it = common_ranges.lower_bound(search_interval);
|
||||
auto it_end = common_ranges.upper_bound(search_interval);
|
||||
if (it == common_ranges.end()) {
|
||||
make_copies();
|
||||
return false;
|
||||
}
|
||||
while (it != it_end) {
|
||||
found_sets.subtract(*it);
|
||||
it++;
|
||||
}
|
||||
make_copies();
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||
std::span<BufferCopy> copies) {
|
||||
|
@ -1591,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
|||
// Apply the staging offset
|
||||
copy.src_offset += upload_staging.offset;
|
||||
}
|
||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
||||
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1633,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
|
|||
}};
|
||||
u8* const src_pointer = upload_staging.mapped_span.data();
|
||||
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
|
||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
||||
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
||||
} else {
|
||||
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
||||
}
|
||||
|
@ -1686,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
|
|||
for (BufferCopy& copy : copies) {
|
||||
// Modify copies to have the staging offset in mind
|
||||
copy.dst_offset += download_staging.offset;
|
||||
buffer.MarkUsage(copy.src_offset, copy.size);
|
||||
}
|
||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
|
||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
|
||||
runtime.Finish();
|
||||
for (const BufferCopy& copy : copies) {
|
||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
||||
|
|
|
@ -529,10 +529,6 @@ private:
|
|||
|
||||
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||
|
||||
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||
|
||||
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||
|
||||
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||
std::span<BufferCopy> copies);
|
||||
|
||||
|
|
79
src/video_core/buffer_cache/usage_tracker.h
Normal file
79
src/video_core/buffer_cache/usage_tracker.h
Normal file
|
@ -0,0 +1,79 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class UsageTracker {
|
||||
static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
|
||||
static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
|
||||
static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
|
||||
|
||||
public:
|
||||
explicit UsageTracker(size_t size) {
|
||||
const size_t num_pages = (size >> PAGE_SHIFT) + 1;
|
||||
pages.resize(num_pages, 0ULL);
|
||||
}
|
||||
|
||||
void Reset() noexcept {
|
||||
std::ranges::fill(pages, 0ULL);
|
||||
}
|
||||
|
||||
void Track(u64 offset, u64 size) noexcept {
|
||||
const size_t page = offset >> PAGE_SHIFT;
|
||||
const size_t page_end = (offset + size) >> PAGE_SHIFT;
|
||||
TrackPage(page, offset, size);
|
||||
if (page == page_end) {
|
||||
return;
|
||||
}
|
||||
for (size_t i = page + 1; i < page_end; i++) {
|
||||
pages[i] = ~u64{0};
|
||||
}
|
||||
const size_t offset_end = offset + size;
|
||||
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
|
||||
TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
|
||||
const size_t page = offset >> PAGE_SHIFT;
|
||||
const size_t page_end = (offset + size) >> PAGE_SHIFT;
|
||||
if (IsPageUsed(page, offset, size)) {
|
||||
return true;
|
||||
}
|
||||
for (size_t i = page + 1; i < page_end; i++) {
|
||||
if (pages[i] != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
const size_t offset_end = offset + size;
|
||||
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
|
||||
return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
|
||||
}
|
||||
|
||||
private:
|
||||
void TrackPage(u64 page, u64 offset, u64 size) noexcept {
|
||||
const size_t offset_in_page = offset % PAGE_BYTES;
|
||||
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
|
||||
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
|
||||
const size_t mask = ~u64{0} >> (64 - num_bits);
|
||||
pages[page] |= (~u64{0} & mask) << first_bit;
|
||||
}
|
||||
|
||||
bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
|
||||
const size_t offset_in_page = offset % PAGE_BYTES;
|
||||
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
|
||||
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
|
||||
const size_t mask = ~u64{0} >> (64 - num_bits);
|
||||
const size_t mask2 = (~u64{0} & mask) << first_bit;
|
||||
return (pages[page] & mask2) != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<u64> pages;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -72,7 +72,7 @@ void Fermi2D::Blit() {
|
|||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
constexpr s64 null_derivate = 1ULL << 32;
|
||||
constexpr s64 null_derivative = 1ULL << 32;
|
||||
Surface src = regs.src;
|
||||
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||
const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
|
||||
|
@ -89,7 +89,7 @@ void Fermi2D::Blit() {
|
|||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
.must_accelerate =
|
||||
args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu,
|
||||
args.du_dx != null_derivative || args.dv_dy != null_derivative || delegate_to_gpu,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
|
|
|
@ -268,7 +268,7 @@ size_t Maxwell3D::EstimateIndexBufferSize() {
|
|||
std::numeric_limits<u32>::max()};
|
||||
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
|
||||
const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
|
||||
const size_t cap{GetMaxCurrentVertices() * 3 * byte_size};
|
||||
const size_t cap{GetMaxCurrentVertices() * 4 * byte_size};
|
||||
const size_t lower_cap =
|
||||
std::min<size_t>(static_cast<size_t>(end_address - start_address), cap);
|
||||
return std::min<size_t>(
|
||||
|
|
|
@ -86,10 +86,7 @@ public:
|
|||
uncommitted_operations.emplace_back(std::move(func));
|
||||
}
|
||||
pending_operations.emplace_back(std::move(uncommitted_operations));
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
QueueFence(new_fence);
|
||||
}
|
||||
QueueFence(new_fence);
|
||||
if (!delay_fence) {
|
||||
func();
|
||||
}
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/host1x/codecs/codec.h"
|
||||
#include "video_core/host1x/codecs/h264.h"
|
||||
|
@ -14,242 +10,17 @@
|
|||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libavfilter/buffersink.h>
|
||||
#include <libavfilter/buffersrc.h>
|
||||
#include <libavutil/opt.h>
|
||||
#ifdef LIBVA_FOUND
|
||||
// for querying VAAPI driver information
|
||||
#include <libavutil/hwcontext_vaapi.h>
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
namespace {
|
||||
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
|
||||
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
|
||||
constexpr std::array PREFERRED_GPU_DECODERS = {
|
||||
AV_HWDEVICE_TYPE_CUDA,
|
||||
#ifdef _WIN32
|
||||
AV_HWDEVICE_TYPE_D3D11VA,
|
||||
AV_HWDEVICE_TYPE_DXVA2,
|
||||
#elif defined(__unix__)
|
||||
AV_HWDEVICE_TYPE_VAAPI,
|
||||
AV_HWDEVICE_TYPE_VDPAU,
|
||||
#endif
|
||||
// last resort for Linux Flatpak (w/ NVIDIA)
|
||||
AV_HWDEVICE_TYPE_VULKAN,
|
||||
};
|
||||
|
||||
void AVPacketDeleter(AVPacket* ptr) {
|
||||
av_packet_free(&ptr);
|
||||
}
|
||||
|
||||
using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
|
||||
|
||||
AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
|
||||
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
|
||||
if (*p == av_codec_ctx->pix_fmt) {
|
||||
return av_codec_ctx->pix_fmt;
|
||||
}
|
||||
}
|
||||
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
|
||||
av_buffer_unref(&av_codec_ctx->hw_device_ctx);
|
||||
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
|
||||
return PREFERRED_CPU_FMT;
|
||||
}
|
||||
|
||||
// List all the currently available hwcontext in ffmpeg
|
||||
std::vector<AVHWDeviceType> ListSupportedContexts() {
|
||||
std::vector<AVHWDeviceType> contexts{};
|
||||
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
|
||||
do {
|
||||
current_device_type = av_hwdevice_iterate_types(current_device_type);
|
||||
contexts.push_back(current_device_type);
|
||||
} while (current_device_type != AV_HWDEVICE_TYPE_NONE);
|
||||
return contexts;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void AVFrameDeleter(AVFrame* ptr) {
|
||||
av_frame_free(&ptr);
|
||||
}
|
||||
|
||||
Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
|
||||
: host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
|
||||
vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
|
||||
vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
|
||||
|
||||
Codec::~Codec() {
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
// Free libav memory
|
||||
avcodec_free_context(&av_codec_ctx);
|
||||
av_buffer_unref(&av_gpu_decoder);
|
||||
|
||||
if (filters_initialized) {
|
||||
avfilter_graph_free(&av_filter_graph);
|
||||
}
|
||||
}
|
||||
|
||||
bool Codec::CreateGpuAvDevice() {
|
||||
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
|
||||
static const auto supported_contexts = ListSupportedContexts();
|
||||
for (const auto& type : PREFERRED_GPU_DECODERS) {
|
||||
if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
|
||||
[&type](const auto& context) { return context == type; })) {
|
||||
LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
|
||||
continue;
|
||||
}
|
||||
// Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
|
||||
av_buffer_unref(&av_gpu_decoder);
|
||||
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
|
||||
if (hwdevice_res < 0) {
|
||||
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
|
||||
av_hwdevice_get_type_name(type), hwdevice_res);
|
||||
continue;
|
||||
}
|
||||
#ifdef LIBVA_FOUND
|
||||
if (type == AV_HWDEVICE_TYPE_VAAPI) {
|
||||
// we need to determine if this is an impersonated VAAPI driver
|
||||
AVHWDeviceContext* hwctx =
|
||||
static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
|
||||
AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
|
||||
const char* vendor_name = vaQueryVendorString(vactx->display);
|
||||
if (strstr(vendor_name, "VDPAU backend")) {
|
||||
// VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
|
||||
LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
|
||||
continue;
|
||||
} else {
|
||||
// according to some user testing, certain vaapi driver (Intel?) could be buggy
|
||||
// so let's log the driver name which may help the developers/supporters
|
||||
LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (int i = 0;; i++) {
|
||||
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
|
||||
if (!config) {
|
||||
LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
|
||||
av_codec->name, av_hwdevice_get_type_name(type));
|
||||
break;
|
||||
}
|
||||
if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
|
||||
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
|
||||
av_codec_ctx->pix_fmt = config->pix_fmt;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Codec::InitializeAvCodecContext() {
|
||||
av_codec_ctx = avcodec_alloc_context3(av_codec);
|
||||
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
|
||||
av_codec_ctx->thread_count = 0;
|
||||
av_codec_ctx->thread_type &= ~FF_THREAD_FRAME;
|
||||
}
|
||||
|
||||
void Codec::InitializeGpuDecoder() {
|
||||
if (!CreateGpuAvDevice()) {
|
||||
av_buffer_unref(&av_gpu_decoder);
|
||||
return;
|
||||
}
|
||||
auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
|
||||
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
|
||||
av_codec_ctx->hw_device_ctx = hw_device_ctx;
|
||||
av_codec_ctx->get_format = GetGpuFormat;
|
||||
}
|
||||
|
||||
void Codec::InitializeAvFilters(AVFrame* frame) {
|
||||
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
|
||||
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
|
||||
AVFilterInOut* inputs = avfilter_inout_alloc();
|
||||
AVFilterInOut* outputs = avfilter_inout_alloc();
|
||||
SCOPE_EXIT({
|
||||
avfilter_inout_free(&inputs);
|
||||
avfilter_inout_free(&outputs);
|
||||
});
|
||||
|
||||
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
|
||||
// so just use 1/1 to make buffer filter happy
|
||||
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width,
|
||||
frame->height, frame->format);
|
||||
|
||||
av_filter_graph = avfilter_graph_alloc();
|
||||
int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(),
|
||||
nullptr, av_filter_graph);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr,
|
||||
av_filter_graph);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
inputs->name = av_strdup("out");
|
||||
inputs->filter_ctx = av_filter_sink_ctx;
|
||||
inputs->pad_idx = 0;
|
||||
inputs->next = nullptr;
|
||||
|
||||
outputs->name = av_strdup("in");
|
||||
outputs->filter_ctx = av_filter_src_ctx;
|
||||
outputs->pad_idx = 0;
|
||||
outputs->next = nullptr;
|
||||
|
||||
const char* description = "yadif=1:-1:0";
|
||||
ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_config(av_filter_graph, nullptr);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
filters_initialized = true;
|
||||
}
|
||||
Codec::~Codec() = default;
|
||||
|
||||
void Codec::Initialize() {
|
||||
const AVCodecID codec = [&] {
|
||||
switch (current_codec) {
|
||||
case Host1x::NvdecCommon::VideoCodec::H264:
|
||||
return AV_CODEC_ID_H264;
|
||||
case Host1x::NvdecCommon::VideoCodec::VP8:
|
||||
return AV_CODEC_ID_VP8;
|
||||
case Host1x::NvdecCommon::VideoCodec::VP9:
|
||||
return AV_CODEC_ID_VP9;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
|
||||
return AV_CODEC_ID_NONE;
|
||||
}
|
||||
}();
|
||||
av_codec = avcodec_find_decoder(codec);
|
||||
|
||||
InitializeAvCodecContext();
|
||||
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
|
||||
InitializeGpuDecoder();
|
||||
}
|
||||
if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
|
||||
avcodec_free_context(&av_codec_ctx);
|
||||
av_buffer_unref(&av_gpu_decoder);
|
||||
return;
|
||||
}
|
||||
if (!av_codec_ctx->hw_device_ctx) {
|
||||
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
|
||||
}
|
||||
initialized = true;
|
||||
initialized = decode_api.Initialize(current_codec);
|
||||
}
|
||||
|
||||
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
|
||||
|
@ -264,14 +35,18 @@ void Codec::Decode() {
|
|||
if (is_first_frame) {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Assemble bitstream.
|
||||
bool vp9_hidden_frame = false;
|
||||
const auto& frame_data = [&]() {
|
||||
size_t configuration_size = 0;
|
||||
const auto packet_data = [&]() {
|
||||
switch (current_codec) {
|
||||
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
|
||||
return h264_decoder->ComposeFrame(state, is_first_frame);
|
||||
return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
|
||||
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
|
||||
return vp8_decoder->ComposeFrame(state);
|
||||
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
|
||||
|
@ -283,89 +58,35 @@ void Codec::Decode() {
|
|||
return std::span<const u8>{};
|
||||
}
|
||||
}();
|
||||
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
|
||||
if (!packet) {
|
||||
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
|
||||
|
||||
// Send assembled bitstream to decoder.
|
||||
if (!decode_api.SendPacket(packet_data, configuration_size)) {
|
||||
return;
|
||||
}
|
||||
packet->data = const_cast<u8*>(frame_data.data());
|
||||
packet->size = static_cast<s32>(frame_data.size());
|
||||
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
|
||||
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
|
||||
return;
|
||||
}
|
||||
// Only receive/store visible frames
|
||||
|
||||
// Only receive/store visible frames.
|
||||
if (vp9_hidden_frame) {
|
||||
return;
|
||||
}
|
||||
AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
|
||||
AVFramePtr final_frame{nullptr, AVFrameDeleter};
|
||||
ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
|
||||
if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
|
||||
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
|
||||
return;
|
||||
}
|
||||
if (initial_frame->width == 0 || initial_frame->height == 0) {
|
||||
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
|
||||
return;
|
||||
}
|
||||
bool is_interlaced = initial_frame->interlaced_frame != 0;
|
||||
if (av_codec_ctx->hw_device_ctx) {
|
||||
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
|
||||
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
|
||||
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
|
||||
// because Intel drivers crash unless using AV_PIX_FMT_NV12
|
||||
final_frame->format = PREFERRED_GPU_FMT;
|
||||
const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
|
||||
ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
|
||||
} else {
|
||||
final_frame = std::move(initial_frame);
|
||||
}
|
||||
if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
|
||||
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
|
||||
return;
|
||||
}
|
||||
if (!is_interlaced) {
|
||||
av_frames.push(std::move(final_frame));
|
||||
} else {
|
||||
if (!filters_initialized) {
|
||||
InitializeAvFilters(final_frame.get());
|
||||
}
|
||||
if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(),
|
||||
AV_BUFFERSRC_FLAG_KEEP_REF);
|
||||
ret) {
|
||||
LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret);
|
||||
return;
|
||||
}
|
||||
while (true) {
|
||||
auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
|
||||
|
||||
int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get());
|
||||
// Receive output frames from decoder.
|
||||
decode_api.ReceiveFrames(frames);
|
||||
|
||||
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF))
|
||||
break;
|
||||
if (ret < 0) {
|
||||
LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
av_frames.push(std::move(filter_frame));
|
||||
}
|
||||
}
|
||||
while (av_frames.size() > 10) {
|
||||
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
|
||||
av_frames.pop();
|
||||
while (frames.size() > 10) {
|
||||
LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
|
||||
frames.pop();
|
||||
}
|
||||
}
|
||||
|
||||
AVFramePtr Codec::GetCurrentFrame() {
|
||||
std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
|
||||
// Sometimes VIC will request more frames than have been decoded.
|
||||
// in this case, return a nullptr and don't overwrite previous frame data
|
||||
if (av_frames.empty()) {
|
||||
return AVFramePtr{nullptr, AVFrameDeleter};
|
||||
// in this case, return a blank frame and don't overwrite previous data.
|
||||
if (frames.empty()) {
|
||||
return {};
|
||||
}
|
||||
AVFramePtr frame = std::move(av_frames.front());
|
||||
av_frames.pop();
|
||||
|
||||
auto frame = std::move(frames.front());
|
||||
frames.pop();
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
|
|
@ -4,28 +4,15 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <queue>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/host1x/ffmpeg/ffmpeg.h"
|
||||
#include "video_core/host1x/nvdec_common.h"
|
||||
|
||||
extern "C" {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#endif
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/avfilter.h>
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
void AVFrameDeleter(AVFrame* ptr);
|
||||
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
|
||||
|
||||
namespace Decoder {
|
||||
class H264;
|
||||
class VP8;
|
||||
|
@ -51,7 +38,7 @@ public:
|
|||
void Decode();
|
||||
|
||||
/// Returns next decoded frame
|
||||
[[nodiscard]] AVFramePtr GetCurrentFrame();
|
||||
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
|
||||
|
||||
/// Returns the value of current_codec
|
||||
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
|
||||
|
@ -60,25 +47,9 @@ public:
|
|||
[[nodiscard]] std::string_view GetCurrentCodecName() const;
|
||||
|
||||
private:
|
||||
void InitializeAvCodecContext();
|
||||
|
||||
void InitializeAvFilters(AVFrame* frame);
|
||||
|
||||
void InitializeGpuDecoder();
|
||||
|
||||
bool CreateGpuAvDevice();
|
||||
|
||||
bool initialized{};
|
||||
bool filters_initialized{};
|
||||
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
|
||||
|
||||
const AVCodec* av_codec{nullptr};
|
||||
AVCodecContext* av_codec_ctx{nullptr};
|
||||
AVBufferRef* av_gpu_decoder{nullptr};
|
||||
|
||||
AVFilterContext* av_filter_src_ctx{nullptr};
|
||||
AVFilterContext* av_filter_sink_ctx{nullptr};
|
||||
AVFilterGraph* av_filter_graph{nullptr};
|
||||
FFmpeg::DecodeApi decode_api;
|
||||
|
||||
Host1x::Host1x& host1x;
|
||||
const Host1x::NvdecCommon::NvdecRegisters& state;
|
||||
|
@ -86,7 +57,7 @@ private:
|
|||
std::unique_ptr<Decoder::VP8> vp8_decoder;
|
||||
std::unique_ptr<Decoder::VP9> vp9_decoder;
|
||||
|
||||
std::queue<AVFramePtr> av_frames{};
|
||||
std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
|
@ -30,7 +30,7 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
|
|||
H264::~H264() = default;
|
||||
|
||||
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
|
||||
bool is_first_frame) {
|
||||
size_t* out_configuration_size, bool is_first_frame) {
|
||||
H264DecoderContext context;
|
||||
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
|
||||
sizeof(H264DecoderContext));
|
||||
|
@ -39,6 +39,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
|
|||
if (!is_first_frame && frame_number != 0) {
|
||||
frame.resize_destructive(context.stream_len);
|
||||
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
|
||||
*out_configuration_size = 0;
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -157,6 +158,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
|
|||
frame.resize(encoded_header.size() + context.stream_len);
|
||||
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
|
||||
|
||||
*out_configuration_size = encoded_header.size();
|
||||
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
|
||||
frame.data() + encoded_header.size(), context.stream_len);
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@ public:
|
|||
|
||||
/// Compose the H264 frame for FFmpeg decoding
|
||||
[[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
|
||||
size_t* out_configuration_size,
|
||||
bool is_first_frame = false);
|
||||
|
||||
private:
|
||||
|
|
419
src/video_core/host1x/ffmpeg/ffmpeg.cpp
Normal file
419
src/video_core/host1x/ffmpeg/ffmpeg.cpp
Normal file
|
@ -0,0 +1,419 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/host1x/ffmpeg/ffmpeg.h"
|
||||
|
||||
extern "C" {
|
||||
#ifdef LIBVA_FOUND
|
||||
// for querying VAAPI driver information
|
||||
#include <libavutil/hwcontext_vaapi.h>
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace FFmpeg {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
|
||||
constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
|
||||
constexpr std::array PreferredGpuDecoders = {
|
||||
AV_HWDEVICE_TYPE_CUDA,
|
||||
#ifdef _WIN32
|
||||
AV_HWDEVICE_TYPE_D3D11VA,
|
||||
AV_HWDEVICE_TYPE_DXVA2,
|
||||
#elif defined(__unix__)
|
||||
AV_HWDEVICE_TYPE_VAAPI,
|
||||
AV_HWDEVICE_TYPE_VDPAU,
|
||||
#endif
|
||||
// last resort for Linux Flatpak (w/ NVIDIA)
|
||||
AV_HWDEVICE_TYPE_VULKAN,
|
||||
};
|
||||
|
||||
AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
|
||||
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
|
||||
if (*p == codec_context->pix_fmt) {
|
||||
return codec_context->pix_fmt;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
|
||||
av_buffer_unref(&codec_context->hw_device_ctx);
|
||||
|
||||
codec_context->pix_fmt = PreferredCpuFormat;
|
||||
return codec_context->pix_fmt;
|
||||
}
|
||||
|
||||
std::string AVError(int errnum) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
|
||||
av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
|
||||
return errbuf;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Packet::Packet(std::span<const u8> data) {
|
||||
m_packet = av_packet_alloc();
|
||||
m_packet->data = const_cast<u8*>(data.data());
|
||||
m_packet->size = static_cast<s32>(data.size());
|
||||
}
|
||||
|
||||
Packet::~Packet() {
|
||||
av_packet_free(&m_packet);
|
||||
}
|
||||
|
||||
Frame::Frame() {
|
||||
m_frame = av_frame_alloc();
|
||||
}
|
||||
|
||||
Frame::~Frame() {
|
||||
av_frame_free(&m_frame);
|
||||
}
|
||||
|
||||
Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
||||
const AVCodecID av_codec = [&] {
|
||||
switch (codec) {
|
||||
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
|
||||
return AV_CODEC_ID_H264;
|
||||
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
|
||||
return AV_CODEC_ID_VP8;
|
||||
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
|
||||
return AV_CODEC_ID_VP9;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown codec {}", codec);
|
||||
return AV_CODEC_ID_NONE;
|
||||
}
|
||||
}();
|
||||
|
||||
m_codec = avcodec_find_decoder(av_codec);
|
||||
}
|
||||
|
||||
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
|
||||
for (int i = 0;; i++) {
|
||||
const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
|
||||
if (!config) {
|
||||
LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name,
|
||||
av_hwdevice_get_type_name(type));
|
||||
break;
|
||||
}
|
||||
if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
|
||||
config->device_type == type) {
|
||||
LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
|
||||
*out_pix_fmt = config->pix_fmt;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
|
||||
std::vector<AVHWDeviceType> types;
|
||||
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
|
||||
|
||||
while (true) {
|
||||
current_device_type = av_hwdevice_iterate_types(current_device_type);
|
||||
if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
|
||||
return types;
|
||||
}
|
||||
|
||||
types.push_back(current_device_type);
|
||||
}
|
||||
}
|
||||
|
||||
HardwareContext::~HardwareContext() {
|
||||
av_buffer_unref(&m_gpu_decoder);
|
||||
}
|
||||
|
||||
bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
|
||||
const Decoder& decoder) {
|
||||
const auto supported_types = GetSupportedDeviceTypes();
|
||||
for (const auto type : PreferredGpuDecoders) {
|
||||
AVPixelFormat hw_pix_fmt;
|
||||
|
||||
if (std::ranges::find(supported_types, type) == supported_types.end()) {
|
||||
LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!this->InitializeWithType(type)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
|
||||
decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
|
||||
av_buffer_unref(&m_gpu_decoder);
|
||||
|
||||
if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0);
|
||||
ret < 0) {
|
||||
LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type),
|
||||
AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef LIBVA_FOUND
|
||||
if (type == AV_HWDEVICE_TYPE_VAAPI) {
|
||||
// We need to determine if this is an impersonated VAAPI driver.
|
||||
auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
|
||||
auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
|
||||
const char* vendor_name = vaQueryVendorString(vactx->display);
|
||||
if (strstr(vendor_name, "VDPAU backend")) {
|
||||
// VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
|
||||
LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
|
||||
return false;
|
||||
} else {
|
||||
// According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
|
||||
// Log the driver name just in case.
|
||||
LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DecoderContext::DecoderContext(const Decoder& decoder) {
|
||||
m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
|
||||
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
|
||||
m_codec_context->thread_count = 0;
|
||||
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
|
||||
}
|
||||
|
||||
DecoderContext::~DecoderContext() {
|
||||
av_buffer_unref(&m_codec_context->hw_device_ctx);
|
||||
avcodec_free_context(&m_codec_context);
|
||||
}
|
||||
|
||||
void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context,
|
||||
AVPixelFormat hw_pix_fmt) {
|
||||
m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
|
||||
m_codec_context->get_format = GetGpuFormat;
|
||||
m_codec_context->pix_fmt = hw_pix_fmt;
|
||||
}
|
||||
|
||||
bool DecoderContext::OpenContext(const Decoder& decoder) {
|
||||
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_codec_context->hw_device_ctx) {
|
||||
LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DecoderContext::SendPacket(const Packet& packet) {
|
||||
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
|
||||
auto dst_frame = std::make_unique<Frame>();
|
||||
|
||||
const auto ReceiveImpl = [&](AVFrame* frame) {
|
||||
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
*out_is_interlaced = frame->interlaced_frame != 0;
|
||||
return true;
|
||||
};
|
||||
|
||||
if (m_codec_context->hw_device_ctx) {
|
||||
// If we have a hardware context, make a separate frame here to receive the
|
||||
// hardware result before sending it to the output.
|
||||
Frame intermediate_frame;
|
||||
|
||||
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
|
||||
return {};
|
||||
}
|
||||
|
||||
dst_frame->SetFormat(PreferredGpuFormat);
|
||||
if (const int ret =
|
||||
av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
|
||||
ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
} else {
|
||||
// Otherwise, decode the frame as normal.
|
||||
if (!ReceiveImpl(dst_frame->GetFrame())) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return dst_frame;
|
||||
}
|
||||
|
||||
DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
|
||||
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
|
||||
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
|
||||
AVFilterInOut* inputs = avfilter_inout_alloc();
|
||||
AVFilterInOut* outputs = avfilter_inout_alloc();
|
||||
SCOPE_EXIT({
|
||||
avfilter_inout_free(&inputs);
|
||||
avfilter_inout_free(&outputs);
|
||||
});
|
||||
|
||||
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
|
||||
// so just use 1/1 to make buffer filter happy
|
||||
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
|
||||
frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
|
||||
|
||||
m_filter_graph = avfilter_graph_alloc();
|
||||
int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
|
||||
nullptr, m_filter_graph);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
|
||||
m_filter_graph);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
inputs->name = av_strdup("out");
|
||||
inputs->filter_ctx = m_sink_context;
|
||||
inputs->pad_idx = 0;
|
||||
inputs->next = nullptr;
|
||||
|
||||
outputs->name = av_strdup("in");
|
||||
outputs->filter_ctx = m_source_context;
|
||||
outputs->pad_idx = 0;
|
||||
outputs->next = nullptr;
|
||||
|
||||
const char* description = "yadif=1:-1:0";
|
||||
ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_config(m_filter_graph, nullptr);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
|
||||
if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
|
||||
AV_BUFFERSRC_FLAG_KEEP_REF);
|
||||
ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
|
||||
auto dst_frame = std::make_unique<Frame>();
|
||||
const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
|
||||
|
||||
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
|
||||
return dst_frame;
|
||||
}
|
||||
|
||||
DeinterlaceFilter::~DeinterlaceFilter() {
|
||||
avfilter_graph_free(&m_filter_graph);
|
||||
}
|
||||
|
||||
void DecodeApi::Reset() {
|
||||
m_deinterlace_filter.reset();
|
||||
m_hardware_context.reset();
|
||||
m_decoder_context.reset();
|
||||
m_decoder.reset();
|
||||
}
|
||||
|
||||
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
||||
this->Reset();
|
||||
m_decoder.emplace(codec);
|
||||
m_decoder_context.emplace(*m_decoder);
|
||||
|
||||
// Enable GPU decoding if requested.
|
||||
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
|
||||
m_hardware_context.emplace();
|
||||
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
|
||||
}
|
||||
|
||||
// Open the decoder context.
|
||||
if (!m_decoder_context->OpenContext(*m_decoder)) {
|
||||
this->Reset();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
|
||||
FFmpeg::Packet packet(packet_data);
|
||||
return m_decoder_context->SendPacket(packet);
|
||||
}
|
||||
|
||||
void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
|
||||
// Receive raw frame from decoder.
|
||||
bool is_interlaced;
|
||||
auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_interlaced) {
|
||||
// If the frame is not interlaced, we can pend it now.
|
||||
frame_queue.push(std::move(frame));
|
||||
} else {
|
||||
// Create the deinterlacer if needed.
|
||||
if (!m_deinterlace_filter) {
|
||||
m_deinterlace_filter.emplace(*frame);
|
||||
}
|
||||
|
||||
// Add the frame we just received.
|
||||
if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Pend output fields.
|
||||
while (true) {
|
||||
auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
|
||||
if (!filter_frame) {
|
||||
break;
|
||||
}
|
||||
|
||||
frame_queue.push(std::move(filter_frame));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace FFmpeg
|
213
src/video_core/host1x/ffmpeg/ffmpeg.h
Normal file
213
src/video_core/host1x/ffmpeg/ffmpeg.h
Normal file
|
@ -0,0 +1,213 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/host1x/nvdec_common.h"
|
||||
|
||||
extern "C" {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#endif
|
||||
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/avfilter.h>
|
||||
#include <libavfilter/buffersink.h>
|
||||
#include <libavfilter/buffersrc.h>
|
||||
#include <libavutil/avutil.h>
|
||||
#include <libavutil/opt.h>
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace FFmpeg {
|
||||
|
||||
class Packet;
|
||||
class Frame;
|
||||
class Decoder;
|
||||
class HardwareContext;
|
||||
class DecoderContext;
|
||||
class DeinterlaceFilter;
|
||||
|
||||
// Wraps an AVPacket, a container for compressed bitstream data.
|
||||
class Packet {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(Packet);
|
||||
YUZU_NON_MOVEABLE(Packet);
|
||||
|
||||
explicit Packet(std::span<const u8> data);
|
||||
~Packet();
|
||||
|
||||
AVPacket* GetPacket() const {
|
||||
return m_packet;
|
||||
}
|
||||
|
||||
private:
|
||||
AVPacket* m_packet{};
|
||||
};
|
||||
|
||||
// Wraps an AVFrame, a container for audio and video stream data.
|
||||
class Frame {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(Frame);
|
||||
YUZU_NON_MOVEABLE(Frame);
|
||||
|
||||
explicit Frame();
|
||||
~Frame();
|
||||
|
||||
int GetWidth() const {
|
||||
return m_frame->width;
|
||||
}
|
||||
|
||||
int GetHeight() const {
|
||||
return m_frame->height;
|
||||
}
|
||||
|
||||
AVPixelFormat GetPixelFormat() const {
|
||||
return static_cast<AVPixelFormat>(m_frame->format);
|
||||
}
|
||||
|
||||
int GetStride(int plane) const {
|
||||
return m_frame->linesize[plane];
|
||||
}
|
||||
|
||||
int* GetStrides() const {
|
||||
return m_frame->linesize;
|
||||
}
|
||||
|
||||
u8* GetData(int plane) const {
|
||||
return m_frame->data[plane];
|
||||
}
|
||||
|
||||
u8** GetPlanes() const {
|
||||
return m_frame->data;
|
||||
}
|
||||
|
||||
void SetFormat(int format) {
|
||||
m_frame->format = format;
|
||||
}
|
||||
|
||||
AVFrame* GetFrame() const {
|
||||
return m_frame;
|
||||
}
|
||||
|
||||
private:
|
||||
AVFrame* m_frame{};
|
||||
};
|
||||
|
||||
// Wraps an AVCodec, a type containing information about a codec.
|
||||
class Decoder {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(Decoder);
|
||||
YUZU_NON_MOVEABLE(Decoder);
|
||||
|
||||
explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
|
||||
~Decoder() = default;
|
||||
|
||||
bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
|
||||
|
||||
const AVCodec* GetCodec() const {
|
||||
return m_codec;
|
||||
}
|
||||
|
||||
private:
|
||||
const AVCodec* m_codec{};
|
||||
};
|
||||
|
||||
// Wraps AVBufferRef for an accelerated decoder.
|
||||
class HardwareContext {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(HardwareContext);
|
||||
YUZU_NON_MOVEABLE(HardwareContext);
|
||||
|
||||
static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
|
||||
|
||||
explicit HardwareContext() = default;
|
||||
~HardwareContext();
|
||||
|
||||
bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
|
||||
|
||||
AVBufferRef* GetBufferRef() const {
|
||||
return m_gpu_decoder;
|
||||
}
|
||||
|
||||
private:
|
||||
bool InitializeWithType(AVHWDeviceType type);
|
||||
|
||||
AVBufferRef* m_gpu_decoder{};
|
||||
};
|
||||
|
||||
// Wraps an AVCodecContext.
|
||||
class DecoderContext {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(DecoderContext);
|
||||
YUZU_NON_MOVEABLE(DecoderContext);
|
||||
|
||||
explicit DecoderContext(const Decoder& decoder);
|
||||
~DecoderContext();
|
||||
|
||||
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
|
||||
bool OpenContext(const Decoder& decoder);
|
||||
bool SendPacket(const Packet& packet);
|
||||
std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
|
||||
|
||||
AVCodecContext* GetCodecContext() const {
|
||||
return m_codec_context;
|
||||
}
|
||||
|
||||
private:
|
||||
AVCodecContext* m_codec_context{};
|
||||
};
|
||||
|
||||
// Wraps an AVFilterGraph.
|
||||
class DeinterlaceFilter {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(DeinterlaceFilter);
|
||||
YUZU_NON_MOVEABLE(DeinterlaceFilter);
|
||||
|
||||
explicit DeinterlaceFilter(const Frame& frame);
|
||||
~DeinterlaceFilter();
|
||||
|
||||
bool AddSourceFrame(const Frame& frame);
|
||||
std::unique_ptr<Frame> DrainSinkFrame();
|
||||
|
||||
private:
|
||||
AVFilterGraph* m_filter_graph{};
|
||||
AVFilterContext* m_source_context{};
|
||||
AVFilterContext* m_sink_context{};
|
||||
bool m_initialized{};
|
||||
};
|
||||
|
||||
class DecodeApi {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(DecodeApi);
|
||||
YUZU_NON_MOVEABLE(DecodeApi);
|
||||
|
||||
DecodeApi() = default;
|
||||
~DecodeApi() = default;
|
||||
|
||||
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
|
||||
void Reset();
|
||||
|
||||
bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
|
||||
void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
|
||||
|
||||
private:
|
||||
std::optional<FFmpeg::Decoder> m_decoder;
|
||||
std::optional<FFmpeg::DecoderContext> m_decoder_context;
|
||||
std::optional<FFmpeg::HardwareContext> m_hardware_context;
|
||||
std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
|
||||
};
|
||||
|
||||
} // namespace FFmpeg
|
|
@ -28,7 +28,7 @@ void Nvdec::ProcessMethod(u32 method, u32 argument) {
|
|||
}
|
||||
}
|
||||
|
||||
AVFramePtr Nvdec::GetFrame() {
|
||||
std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() {
|
||||
return codec->GetCurrentFrame();
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ public:
|
|||
void ProcessMethod(u32 method, u32 argument);
|
||||
|
||||
/// Return most recently decoded frame
|
||||
[[nodiscard]] AVFramePtr GetFrame();
|
||||
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame();
|
||||
|
||||
private:
|
||||
/// Invoke codec to decode a frame
|
||||
|
|
|
@ -82,27 +82,26 @@ void Vic::Execute() {
|
|||
return;
|
||||
}
|
||||
const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)};
|
||||
const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
|
||||
const auto* frame = frame_ptr.get();
|
||||
auto frame = nvdec_processor->GetFrame();
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
const u64 surface_width = config.surface_width_minus1 + 1;
|
||||
const u64 surface_height = config.surface_height_minus1 + 1;
|
||||
if (static_cast<u64>(frame->width) != surface_width ||
|
||||
static_cast<u64>(frame->height) != surface_height) {
|
||||
if (static_cast<u64>(frame->GetWidth()) != surface_width ||
|
||||
static_cast<u64>(frame->GetHeight()) != surface_height) {
|
||||
// TODO: Properly support multiple video streams with differing frame dimensions
|
||||
LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}",
|
||||
frame->width, frame->height, surface_width, surface_height);
|
||||
frame->GetWidth(), frame->GetHeight(), surface_width, surface_height);
|
||||
}
|
||||
switch (config.pixel_format) {
|
||||
case VideoPixelFormat::RGBA8:
|
||||
case VideoPixelFormat::BGRA8:
|
||||
case VideoPixelFormat::RGBX8:
|
||||
WriteRGBFrame(frame, config);
|
||||
WriteRGBFrame(std::move(frame), config);
|
||||
break;
|
||||
case VideoPixelFormat::YUV420:
|
||||
WriteYUVFrame(frame, config);
|
||||
WriteYUVFrame(std::move(frame), config);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
|
||||
|
@ -110,10 +109,14 @@ void Vic::Execute() {
|
|||
}
|
||||
}
|
||||
|
||||
void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
|
||||
void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
|
||||
LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
|
||||
|
||||
if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) {
|
||||
const auto frame_width = frame->GetWidth();
|
||||
const auto frame_height = frame->GetHeight();
|
||||
const auto frame_format = frame->GetPixelFormat();
|
||||
|
||||
if (!scaler_ctx || frame_width != scaler_width || frame_height != scaler_height) {
|
||||
const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
|
||||
switch (pixel_format) {
|
||||
case VideoPixelFormat::RGBA8:
|
||||
|
@ -129,27 +132,26 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
|
|||
|
||||
sws_freeContext(scaler_ctx);
|
||||
// Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
|
||||
scaler_ctx = sws_getContext(frame->width, frame->height,
|
||||
static_cast<AVPixelFormat>(frame->format), frame->width,
|
||||
frame->height, target_format, 0, nullptr, nullptr, nullptr);
|
||||
scaler_width = frame->width;
|
||||
scaler_height = frame->height;
|
||||
scaler_ctx = sws_getContext(frame_width, frame_height, frame_format, frame_width,
|
||||
frame_height, target_format, 0, nullptr, nullptr, nullptr);
|
||||
scaler_width = frame_width;
|
||||
scaler_height = frame_height;
|
||||
converted_frame_buffer.reset();
|
||||
}
|
||||
if (!converted_frame_buffer) {
|
||||
const size_t frame_size = frame->width * frame->height * 4;
|
||||
const size_t frame_size = frame_width * frame_height * 4;
|
||||
converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free};
|
||||
}
|
||||
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
|
||||
const std::array<int, 4> converted_stride{frame_width * 4, frame_height * 4, 0, 0};
|
||||
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
|
||||
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
|
||||
converted_stride.data());
|
||||
sws_scale(scaler_ctx, frame->GetPlanes(), frame->GetStrides(), 0, frame_height,
|
||||
&converted_frame_buf_addr, converted_stride.data());
|
||||
|
||||
// Use the minimum of surface/frame dimensions to avoid buffer overflow.
|
||||
const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1;
|
||||
const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1;
|
||||
const u32 width = std::min(surface_width, static_cast<u32>(frame->width));
|
||||
const u32 height = std::min(surface_height, static_cast<u32>(frame->height));
|
||||
const u32 width = std::min(surface_width, static_cast<u32>(frame_width));
|
||||
const u32 height = std::min(surface_height, static_cast<u32>(frame_height));
|
||||
const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
|
||||
if (blk_kind != 0) {
|
||||
// swizzle pitch linear to block linear
|
||||
|
@ -169,23 +171,23 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
|
|||
}
|
||||
}
|
||||
|
||||
void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
|
||||
void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
|
||||
LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
|
||||
|
||||
const std::size_t surface_width = config.surface_width_minus1 + 1;
|
||||
const std::size_t surface_height = config.surface_height_minus1 + 1;
|
||||
const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
|
||||
// Use the minimum of surface/frame dimensions to avoid buffer overflow.
|
||||
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
|
||||
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
|
||||
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->GetWidth()));
|
||||
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->GetHeight()));
|
||||
|
||||
const auto stride = static_cast<size_t>(frame->linesize[0]);
|
||||
const auto stride = static_cast<size_t>(frame->GetStride(0));
|
||||
|
||||
luma_buffer.resize_destructive(aligned_width * surface_height);
|
||||
chroma_buffer.resize_destructive(aligned_width * surface_height / 2);
|
||||
|
||||
// Populate luma buffer
|
||||
const u8* luma_src = frame->data[0];
|
||||
const u8* luma_src = frame->GetData(0);
|
||||
for (std::size_t y = 0; y < frame_height; ++y) {
|
||||
const std::size_t src = y * stride;
|
||||
const std::size_t dst = y * aligned_width;
|
||||
|
@ -196,16 +198,16 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
|
|||
|
||||
// Chroma
|
||||
const std::size_t half_height = frame_height / 2;
|
||||
const auto half_stride = static_cast<size_t>(frame->linesize[1]);
|
||||
const auto half_stride = static_cast<size_t>(frame->GetStride(1));
|
||||
|
||||
switch (frame->format) {
|
||||
switch (frame->GetPixelFormat()) {
|
||||
case AV_PIX_FMT_YUV420P: {
|
||||
// Frame from FFmpeg software
|
||||
// Populate chroma buffer from both channels with interleaving.
|
||||
const std::size_t half_width = frame_width / 2;
|
||||
u8* chroma_buffer_data = chroma_buffer.data();
|
||||
const u8* chroma_b_src = frame->data[1];
|
||||
const u8* chroma_r_src = frame->data[2];
|
||||
const u8* chroma_b_src = frame->GetData(1);
|
||||
const u8* chroma_r_src = frame->GetData(2);
|
||||
for (std::size_t y = 0; y < half_height; ++y) {
|
||||
const std::size_t src = y * half_stride;
|
||||
const std::size_t dst = y * aligned_width;
|
||||
|
@ -219,7 +221,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
|
|||
case AV_PIX_FMT_NV12: {
|
||||
// Frame from VA-API hardware
|
||||
// This is already interleaved so just copy
|
||||
const u8* chroma_src = frame->data[1];
|
||||
const u8* chroma_src = frame->GetData(1);
|
||||
for (std::size_t y = 0; y < half_height; ++y) {
|
||||
const std::size_t src = y * stride;
|
||||
const std::size_t dst = y * aligned_width;
|
||||
|
|
|
@ -39,9 +39,9 @@ public:
|
|||
private:
|
||||
void Execute();
|
||||
|
||||
void WriteRGBFrame(const AVFrame* frame, const VicConfig& config);
|
||||
void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
|
||||
|
||||
void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
|
||||
void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
|
||||
|
||||
Host1x& host1x;
|
||||
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
|
||||
|
|
|
@ -266,7 +266,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
|||
return;
|
||||
}
|
||||
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
|
||||
UNREACHABLE();
|
||||
ASSERT(false);
|
||||
return;
|
||||
}
|
||||
query_base->value += streamer->GetAmmendValue();
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "common/alignment.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_null/null_rasterizer.h"
|
||||
|
@ -99,8 +100,14 @@ bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
}
|
||||
void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {}
|
||||
void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {}
|
||||
void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {}
|
||||
void RasterizerNull::ReleaseChannel(s32 channel_id) {}
|
||||
void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {
|
||||
CreateChannel(channel);
|
||||
}
|
||||
void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {
|
||||
BindToChannel(channel.bind_id);
|
||||
}
|
||||
void RasterizerNull::ReleaseChannel(s32 channel_id) {
|
||||
EraseChannel(channel_id);
|
||||
}
|
||||
|
||||
} // namespace Null
|
||||
|
|
|
@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
|
|||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool) {
|
||||
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool) {
|
||||
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PreCopyBarrier() {
|
||||
|
|
|
@ -30,6 +30,8 @@ public:
|
|||
|
||||
void MakeResident(GLenum access) noexcept;
|
||||
|
||||
void MarkUsage(u64 offset, u64 size) {}
|
||||
|
||||
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
||||
|
||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||
|
@ -66,22 +68,29 @@ public:
|
|||
|
||||
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
|
||||
|
||||
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier);
|
||||
|
||||
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier);
|
||||
|
||||
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload = false);
|
||||
|
||||
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool);
|
||||
|
||||
void PreCopyBarrier();
|
||||
void PostCopyBarrier();
|
||||
void Finish();
|
||||
|
||||
void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
|
||||
|
||||
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
|
||||
|
||||
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
|
||||
|
|
|
@ -559,7 +559,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
}
|
||||
|
||||
void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
|
||||
glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS);
|
||||
const GLenum buffer_mode =
|
||||
num_xfb_buffers_active == 1 ? GL_INTERLEAVED_ATTRIBS : GL_SEPARATE_ATTRIBS;
|
||||
glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), buffer_mode);
|
||||
}
|
||||
|
||||
void GraphicsPipeline::GenerateTransformFeedbackState() {
|
||||
|
@ -567,12 +569,14 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
|
|||
// when this is required.
|
||||
GLint* cursor{xfb_attribs.data()};
|
||||
|
||||
num_xfb_buffers_active = 0;
|
||||
for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
|
||||
const auto& layout = key.xfb_state.layouts[feedback];
|
||||
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
|
||||
if (layout.varying_count == 0) {
|
||||
continue;
|
||||
}
|
||||
num_xfb_buffers_active++;
|
||||
|
||||
const auto& locations = key.xfb_state.varyings[feedback];
|
||||
std::optional<u32> current_index;
|
||||
|
|
|
@ -154,6 +154,7 @@ private:
|
|||
|
||||
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
|
||||
GLsizei num_xfb_attribs{};
|
||||
u32 num_xfb_buffers_active{};
|
||||
std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
|
||||
|
||||
std::mutex built_mutex;
|
||||
|
|
|
@ -555,7 +555,7 @@ void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
|
|||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
|
|
@ -132,16 +132,12 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
const bool use_accelerated =
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
{
|
||||
std::scoped_lock lock{rasterizer.LockCaches()};
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
Frame* frame = present_manager.GetRenderFrame();
|
||||
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
|
||||
scheduler.Flush(*frame->render_ready);
|
||||
present_manager.Present(frame);
|
||||
}
|
||||
Frame* frame = present_manager.GetRenderFrame();
|
||||
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
|
||||
scheduler.Flush(*frame->render_ready);
|
||||
present_manager.Present(frame);
|
||||
|
||||
gpu.RendererFrameEndNotify();
|
||||
rasterizer.TickFrame();
|
||||
|
|
|
@ -137,6 +137,56 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
|
|||
|
||||
BlitScreen::~BlitScreen() = default;
|
||||
|
||||
static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer,
|
||||
const ScreenInfo& screen_info) {
|
||||
f32 left, top, right, bottom;
|
||||
|
||||
if (!framebuffer.crop_rect.IsEmpty()) {
|
||||
// If crop rectangle is not empty, apply properties from rectangle.
|
||||
left = static_cast<f32>(framebuffer.crop_rect.left);
|
||||
top = static_cast<f32>(framebuffer.crop_rect.top);
|
||||
right = static_cast<f32>(framebuffer.crop_rect.right);
|
||||
bottom = static_cast<f32>(framebuffer.crop_rect.bottom);
|
||||
} else {
|
||||
// Otherwise, fall back to framebuffer dimensions.
|
||||
left = 0;
|
||||
top = 0;
|
||||
right = static_cast<f32>(framebuffer.width);
|
||||
bottom = static_cast<f32>(framebuffer.height);
|
||||
}
|
||||
|
||||
// Apply transformation flags.
|
||||
auto framebuffer_transform_flags = framebuffer.transform_flags;
|
||||
|
||||
if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) {
|
||||
// Switch left and right.
|
||||
std::swap(left, right);
|
||||
}
|
||||
if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) {
|
||||
// Switch top and bottom.
|
||||
std::swap(top, bottom);
|
||||
}
|
||||
|
||||
framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH;
|
||||
framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV;
|
||||
if (True(framebuffer_transform_flags)) {
|
||||
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
|
||||
static_cast<u32>(framebuffer_transform_flags));
|
||||
}
|
||||
|
||||
// Get the screen properties.
|
||||
const f32 screen_width = static_cast<f32>(screen_info.width);
|
||||
const f32 screen_height = static_cast<f32>(screen_info.height);
|
||||
|
||||
// Normalize coordinate space.
|
||||
left /= screen_width;
|
||||
top /= screen_height;
|
||||
right /= screen_width;
|
||||
bottom /= screen_height;
|
||||
|
||||
return Common::Rectangle<f32>(left, top, right, bottom);
|
||||
}
|
||||
|
||||
void BlitScreen::Recreate() {
|
||||
present_manager.WaitPresent();
|
||||
scheduler.Finish();
|
||||
|
@ -354,17 +404,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
|||
source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view);
|
||||
}
|
||||
if (fsr) {
|
||||
auto crop_rect = framebuffer.crop_rect;
|
||||
if (crop_rect.GetWidth() == 0) {
|
||||
crop_rect.right = framebuffer.width;
|
||||
}
|
||||
if (crop_rect.GetHeight() == 0) {
|
||||
crop_rect.bottom = framebuffer.height;
|
||||
}
|
||||
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
|
||||
VkExtent2D fsr_input_size{
|
||||
.width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
|
||||
.height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
|
||||
const auto crop_rect = NormalizeCrop(framebuffer, screen_info);
|
||||
const VkExtent2D fsr_input_size{
|
||||
.width = Settings::values.resolution_info.ScaleUp(screen_info.width),
|
||||
.height = Settings::values.resolution_info.ScaleUp(screen_info.height),
|
||||
};
|
||||
VkImageView fsr_image_view =
|
||||
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
|
||||
|
@ -1397,61 +1440,37 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou
|
|||
|
||||
void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout layout) const {
|
||||
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
|
||||
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
|
||||
f32 left, top, right, bottom;
|
||||
|
||||
static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
|
||||
auto left = texcoords.left;
|
||||
auto right = texcoords.right;
|
||||
if (fsr) {
|
||||
// FSR has already applied the crop, so we just want to render the image
|
||||
// it has produced.
|
||||
left = 0;
|
||||
top = 0;
|
||||
right = 1;
|
||||
bottom = 1;
|
||||
} else {
|
||||
// Get the normalized crop rectangle.
|
||||
const auto crop = NormalizeCrop(framebuffer, screen_info);
|
||||
|
||||
switch (framebuffer_transform_flags) {
|
||||
case Service::android::BufferTransformFlags::Unset:
|
||||
break;
|
||||
case Service::android::BufferTransformFlags::FlipV:
|
||||
// Flip the framebuffer vertically
|
||||
left = texcoords.right;
|
||||
right = texcoords.left;
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
|
||||
static_cast<u32>(framebuffer_transform_flags));
|
||||
break;
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
|
||||
|
||||
f32 left_start{};
|
||||
if (framebuffer_crop_rect.Top() > 0) {
|
||||
left_start = static_cast<f32>(framebuffer_crop_rect.Top()) /
|
||||
static_cast<f32>(framebuffer_crop_rect.Bottom());
|
||||
}
|
||||
f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
|
||||
f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
|
||||
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
|
||||
// (e.g. handheld mode) on a 1920x1080 framebuffer.
|
||||
if (!fsr) {
|
||||
if (framebuffer_crop_rect.GetWidth() > 0) {
|
||||
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
|
||||
static_cast<f32>(screen_info.width);
|
||||
}
|
||||
if (framebuffer_crop_rect.GetHeight() > 0) {
|
||||
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
||||
static_cast<f32>(screen_info.height);
|
||||
}
|
||||
// Apply the crop.
|
||||
left = crop.left;
|
||||
top = crop.top;
|
||||
right = crop.right;
|
||||
bottom = crop.bottom;
|
||||
}
|
||||
|
||||
// Map the coordinates to the screen.
|
||||
const auto& screen = layout.screen;
|
||||
const auto x = static_cast<f32>(screen.left);
|
||||
const auto y = static_cast<f32>(screen.top);
|
||||
const auto w = static_cast<f32>(screen.GetWidth());
|
||||
const auto h = static_cast<f32>(screen.GetHeight());
|
||||
data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v);
|
||||
data.vertices[1] =
|
||||
ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v);
|
||||
data.vertices[2] =
|
||||
ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v);
|
||||
data.vertices[3] =
|
||||
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v);
|
||||
|
||||
data.vertices[0] = ScreenRectVertex(x, y, left, top);
|
||||
data.vertices[1] = ScreenRectVertex(x + w, y, right, top);
|
||||
data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom);
|
||||
data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom);
|
||||
}
|
||||
|
||||
void BlitScreen::CreateSMAA(VkExtent2D smaa_size) {
|
||||
|
|
|
@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
|
|||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
|
||||
device{&runtime.device}, buffer{
|
||||
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
|
||||
device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
|
||||
tracker{SizeBytes()} {
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
|
@ -359,12 +359,31 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
|
|||
return static_cast<u32>(device.GetStorageBufferAlignment());
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
|
||||
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
|
||||
it->ResetUsageTracking();
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
if (Settings::values.disable_buffer_reorder) {
|
||||
return false;
|
||||
}
|
||||
const bool can_use_upload_cmdbuf =
|
||||
std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
|
||||
return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
|
||||
});
|
||||
return can_use_upload_cmdbuf;
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload) {
|
||||
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
|
||||
return;
|
||||
}
|
||||
|
@ -380,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
|||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
|
||||
scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
|
||||
vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
||||
upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||
if (barrier) {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/usage_tracker.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
|
@ -34,6 +35,18 @@ public:
|
|||
return *buffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
|
||||
return tracker.IsUsed(offset, size);
|
||||
}
|
||||
|
||||
void MarkUsage(u64 offset, u64 size) noexcept {
|
||||
tracker.Track(offset, size);
|
||||
}
|
||||
|
||||
void ResetUsageTracking() noexcept {
|
||||
tracker.Reset();
|
||||
}
|
||||
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
@ -49,6 +62,7 @@ private:
|
|||
const Device* device{};
|
||||
vk::Buffer buffer;
|
||||
std::vector<BufferView> views;
|
||||
VideoCommon::UsageTracker tracker;
|
||||
};
|
||||
|
||||
class QuadArrayIndexBuffer;
|
||||
|
@ -67,6 +81,8 @@ public:
|
|||
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
||||
DescriptorPool& descriptor_pool);
|
||||
|
||||
void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
|
||||
|
||||
void Finish();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
@ -81,12 +97,15 @@ public:
|
|||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
|
||||
|
||||
void PreCopyBarrier();
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload = false);
|
||||
|
||||
void PostCopyBarrier();
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
|
|||
}
|
||||
|
||||
VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) {
|
||||
|
||||
UpdateDescriptorSet(image_index, image_view);
|
||||
|
||||
|
@ -61,15 +61,21 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag
|
|||
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
|
||||
|
||||
std::array<u32, 4 * 4> push_constants;
|
||||
FsrEasuConOffset(
|
||||
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
|
||||
push_constants.data() + 12,
|
||||
const f32 input_image_width = static_cast<f32>(input_image_extent.width);
|
||||
const f32 input_image_height = static_cast<f32>(input_image_extent.height);
|
||||
const f32 output_image_width = static_cast<f32>(output_size.width);
|
||||
const f32 output_image_height = static_cast<f32>(output_size.height);
|
||||
const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
|
||||
const f32 viewport_x = crop_rect.left * input_image_width;
|
||||
const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
|
||||
const f32 viewport_y = crop_rect.top * input_image_height;
|
||||
|
||||
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
|
||||
static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
|
||||
static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
|
||||
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
|
||||
std::array<u32, 4 * 4> push_constants;
|
||||
FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4,
|
||||
push_constants.data() + 8, push_constants.data() + 12,
|
||||
|
||||
viewport_width, viewport_height, input_image_width, input_image_height,
|
||||
output_image_width, output_image_height, viewport_x, viewport_y);
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
|
||||
|
||||
{
|
||||
|
|
|
@ -17,7 +17,7 @@ public:
|
|||
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
|
||||
VkExtent2D output_size);
|
||||
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect);
|
||||
|
||||
private:
|
||||
void CreateDescriptorPool();
|
||||
|
|
|
@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
|
|||
Refresh();
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick) {
|
||||
if (semaphore) {
|
||||
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
|
||||
host_tick);
|
||||
} else {
|
||||
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
|||
};
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
const VkSemaphore timeline_semaphore = *semaphore;
|
||||
|
@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
|||
const std::array signal_values{host_tick, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
const VkTimelineSemaphoreSubmitInfo timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
||||
|
@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
|||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
|
||||
.pCommandBuffers = cmdbuffers.data(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
|||
return device.GetGraphicsQueue().Submit(submit_info);
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
|
||||
vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick) {
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
|
||||
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
|
||||
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
|
||||
.pCommandBuffers = cmdbuffers.data(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
};
|
||||
|
|
|
@ -52,14 +52,16 @@ public:
|
|||
void Wait(u64 tick);
|
||||
|
||||
/// Submits the device graphics queue, updating the tick as necessary
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
|
||||
|
||||
private:
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick);
|
||||
|
||||
void WaitThread(std::stop_token token);
|
||||
|
||||
|
|
|
@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
|
|||
info.y_negate = key.state.y_negate != 0;
|
||||
return info;
|
||||
}
|
||||
|
||||
size_t GetTotalPipelineWorkers() {
|
||||
const size_t max_core_threads =
|
||||
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
|
||||
#ifdef ANDROID
|
||||
// Leave at least a few cores free in android
|
||||
constexpr size_t free_cores = 3ULL;
|
||||
if (max_core_threads <= free_cores) {
|
||||
return 1ULL;
|
||||
}
|
||||
return max_core_threads - free_cores;
|
||||
#else
|
||||
return max_core_threads;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||
|
@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
|||
texture_cache{texture_cache_}, shader_notify{shader_notify_},
|
||||
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
|
||||
use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
|
||||
#ifdef ANDROID
|
||||
workers(1, "VkPipelineBuilder"),
|
||||
#else
|
||||
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
|
||||
#endif
|
||||
workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(),
|
||||
"VkPipelineBuilder"),
|
||||
serialization_thread(1, "VkPipelineSerialization") {
|
||||
const auto& float_control{device.FloatControlProperties()};
|
||||
const VkDriverId driver_id{device.GetDriverID()};
|
||||
|
@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
|||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_native_ndc = device.IsExtDepthClipControlSupported(),
|
||||
.support_scaled_attributes = !device.MustEmulateScaledFormats(),
|
||||
.support_multi_viewport = device.SupportsMultiViewport(),
|
||||
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||
|
||||
|
|
|
@ -211,6 +211,13 @@ public:
|
|||
return;
|
||||
}
|
||||
PauseCounter();
|
||||
const auto driver_id = device.GetDriverID();
|
||||
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
pending_sync.clear();
|
||||
sync_values_stash.clear();
|
||||
return;
|
||||
}
|
||||
sync_values_stash.clear();
|
||||
sync_values_stash.emplace_back();
|
||||
std::vector<HostSyncValues>* sync_values = &sync_values_stash.back();
|
||||
|
@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
|||
return true;
|
||||
}
|
||||
|
||||
auto driver_id = impl->device.GetDriverID();
|
||||
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
|
|||
}
|
||||
|
||||
if (y_negate) {
|
||||
y += height;
|
||||
y += conv(static_cast<f32>(regs.surface_clip.height));
|
||||
height = -height;
|
||||
}
|
||||
|
||||
|
@ -199,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
|||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
// update engine as channel may be different.
|
||||
pipeline->SetEngine(maxwell3d, gpu_memory);
|
||||
pipeline->Configure(is_indexed);
|
||||
|
@ -621,7 +621,7 @@ void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
|
|||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
@ -710,7 +710,6 @@ void RasterizerVulkan::TiledCacheBarrier() {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::FlushCommands() {
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
if (draw_counter == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -808,7 +807,6 @@ void RasterizerVulkan::FlushWork() {
|
|||
if ((++draw_counter & 7) != 7) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
if (draw_counter < DRAWS_TO_DISPATCH) {
|
||||
// Send recorded tasks to the worker thread
|
||||
scheduler.DispatchWork();
|
||||
|
@ -923,9 +921,13 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::HandleTransformFeedback() {
|
||||
static std::once_flag warn_unsupported;
|
||||
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
std::call_once(warn_unsupported, [&] {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
});
|
||||
return;
|
||||
}
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
|
||||
|
@ -1503,7 +1505,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
|
|||
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
|
||||
CreateChannel(channel);
|
||||
{
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.CreateChannel(channel);
|
||||
buffer_cache.CreateChannel(channel);
|
||||
}
|
||||
|
@ -1516,7 +1518,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
|
|||
const s32 channel_id = channel.bind_id;
|
||||
BindToChannel(channel_id);
|
||||
{
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.BindToChannel(channel_id);
|
||||
buffer_cache.BindToChannel(channel_id);
|
||||
}
|
||||
|
@ -1529,7 +1531,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
|
|||
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
|
||||
EraseChannel(channel_id);
|
||||
{
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.EraseChannel(channel_id);
|
||||
buffer_cache.EraseChannel(channel_id);
|
||||
}
|
||||
|
|
|
@ -133,10 +133,6 @@ public:
|
|||
|
||||
void ReleaseChannel(s32 channel_id) override;
|
||||
|
||||
std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() {
|
||||
return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
|
|
|
@ -22,11 +22,12 @@ namespace Vulkan {
|
|||
|
||||
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
|
||||
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
|
||||
vk::CommandBuffer upload_cmdbuf) {
|
||||
auto command = first;
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(cmdbuf);
|
||||
command->Execute(cmdbuf, upload_cmdbuf);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
|
@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
|
|||
// Perform the work, tracking whether the chunk was a submission
|
||||
// before executing.
|
||||
const bool has_submit = work->HasSubmit();
|
||||
work->ExecuteAll(current_cmdbuf);
|
||||
work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
|
||||
|
||||
// If the chunk was a submission, reallocate the command buffer.
|
||||
if (has_submit) {
|
||||
|
@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
|
|||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
|
||||
current_upload_cmdbuf.Begin({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
}
|
||||
|
||||
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
|
@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
|||
InvalidateState();
|
||||
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
|
||||
this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
|
||||
upload_cmdbuf.End();
|
||||
cmdbuf.End();
|
||||
|
||||
if (on_submit) {
|
||||
|
@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
|||
|
||||
std::scoped_lock lock{submit_mutex};
|
||||
switch (const VkResult result = master_semaphore->SubmitQueue(
|
||||
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
|
|
|
@ -80,7 +80,8 @@ public:
|
|||
|
||||
/// Send work to a separate thread.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
|
||||
void RecordWithUploadBuffer(T&& command) {
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
|
@ -88,6 +89,15 @@ public:
|
|||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_v<T, vk::CommandBuffer>
|
||||
void Record(T&& c) {
|
||||
this->RecordWithUploadBuffer(
|
||||
[command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
|
||||
command(cmdbuf);
|
||||
});
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
|
@ -119,7 +129,7 @@ private:
|
|||
public:
|
||||
virtual ~Command() = default;
|
||||
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
|
||||
|
||||
Command* GetNext() const {
|
||||
return next;
|
||||
|
@ -142,8 +152,8 @@ private:
|
|||
TypedCommand(TypedCommand&&) = delete;
|
||||
TypedCommand& operator=(TypedCommand&&) = delete;
|
||||
|
||||
void Execute(vk::CommandBuffer cmdbuf) const override {
|
||||
command(cmdbuf);
|
||||
void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
|
||||
command(cmdbuf, upload_cmdbuf);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -152,7 +162,7 @@ private:
|
|||
|
||||
class CommandChunk final {
|
||||
public:
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
|
||||
|
||||
template <typename T>
|
||||
bool Record(T& command) {
|
||||
|
@ -228,6 +238,7 @@ private:
|
|||
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
||||
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
vk::CommandBuffer current_upload_cmdbuf;
|
||||
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
std::function<void()> on_submit;
|
||||
|
|
|
@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
|
|||
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
|
||||
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
|
||||
|
||||
scheduler.Record([&](vk::CommandBuffer& cmdbuf) {
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
for (auto& images : m_dynamic_images) {
|
||||
for (size_t i = 0; i < MaxDynamicImage; i++) {
|
||||
ClearColorImage(cmdbuf, *images.images[i]);
|
||||
|
@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
|
|||
UpdateDescriptorSets(source_image_view, image_index);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) {
|
||||
scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
|
||||
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
|
||||
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
|
||||
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
|
||||
|
|
|
@ -36,6 +36,10 @@ public:
|
|||
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
|
||||
void FreeDeferred(StagingBufferRef& ref);
|
||||
|
||||
[[nodiscard]] VkBuffer StreamBuf() const noexcept {
|
||||
return *stream_buffer;
|
||||
}
|
||||
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
|
|
|
@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
|||
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
|
||||
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
|
||||
: VideoCommon::ImageViewBase{params} {}
|
||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
|
||||
: VideoCommon::ImageViewBase{params}, device{&runtime.device} {
|
||||
if (device->HasNullDescriptor()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle fallback for devices without nullDescriptor
|
||||
ImageInfo info{};
|
||||
info.format = PixelFormat::A8B8G8R8_UNORM;
|
||||
|
||||
null_image = MakeImage(*device, runtime.memory_allocator, info, {});
|
||||
image_handle = *null_image;
|
||||
for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) {
|
||||
image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
ImageView::~ImageView() = default;
|
||||
|
||||
|
|
|
@ -267,6 +267,7 @@ private:
|
|||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
vk::ImageView color_view;
|
||||
vk::Image null_image;
|
||||
VkImage image_handle = VK_NULL_HANDLE;
|
||||
VkImageView render_target = VK_NULL_HANDLE;
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
|
|
@ -138,6 +138,10 @@ public:
|
|||
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t size() const noexcept {
|
||||
return values_capacity - free_list.size();
|
||||
}
|
||||
|
||||
private:
|
||||
struct NonTrivialDummy {
|
||||
NonTrivialDummy() noexcept {}
|
||||
|
|
|
@ -635,6 +635,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||
has_broken_cube_compatibility = true;
|
||||
}
|
||||
}
|
||||
if (is_qualcomm) {
|
||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
|
||||
has_broken_parallel_compiling = true;
|
||||
}
|
||||
}
|
||||
if (extensions.sampler_filter_minmax && is_amd) {
|
||||
// Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
|
||||
if (!features.shader_float16_int8.shaderFloat16) {
|
||||
|
@ -863,7 +869,8 @@ bool Device::ShouldBoostClocks() const {
|
|||
driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA ||
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP;
|
||||
|
||||
const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
|
||||
const bool is_steam_deck = (vendor_id == 0x1002 && device_id == 0x163F) ||
|
||||
(vendor_id == 0x1002 && device_id == 0x1435);
|
||||
|
||||
const bool is_debugging = this->HasDebuggingToolAttached();
|
||||
|
||||
|
|
|
@ -102,6 +102,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
|||
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
|
||||
EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
|
||||
|
@ -599,6 +600,11 @@ public:
|
|||
return has_broken_cube_compatibility;
|
||||
}
|
||||
|
||||
/// Returns true if parallel shader compiling has issues with the current driver.
|
||||
bool HasBrokenParallelShaderCompiling() const {
|
||||
return has_broken_parallel_compiling;
|
||||
}
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return properties.driver.driverName;
|
||||
|
@ -663,6 +669,10 @@ public:
|
|||
return supports_conditional_barriers;
|
||||
}
|
||||
|
||||
bool SupportsMultiViewport() const {
|
||||
return features2.features.multiViewport;
|
||||
}
|
||||
|
||||
[[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
|
||||
u32 driver_version) {
|
||||
if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
||||
|
@ -794,6 +804,7 @@ private:
|
|||
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
|
||||
bool has_broken_compute{}; ///< Compute shaders can cause crashes
|
||||
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
|
||||
bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling.
|
||||
bool has_renderdoc{}; ///< Has RenderDoc attached
|
||||
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
|
||||
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
|
||||
|
|
|
@ -1101,6 +1101,10 @@ public:
|
|||
return &handle;
|
||||
}
|
||||
|
||||
VkCommandBuffer operator*() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
void Begin(const VkCommandBufferBeginInfo& begin_info) const {
|
||||
Check(dld->vkBeginCommandBuffer(handle, &begin_info));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue