texture_cache: Simplify image view queries and blacklisting

This commit is contained in:
ReinUsesLisp 2021-07-28 02:47:06 -03:00 committed by Fernando Sahmkow
parent 48d81506a3
commit 56ccda1d99
16 changed files with 192 additions and 192 deletions

View file

@ -156,28 +156,27 @@ private:
u32 texture_bit{1u};
};
inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
const ImageId*& image_view_ids, TextureCache& texture_cache,
inline void PushImageDescriptors(TextureCache& texture_cache,
VKUpdateDescriptorQueue& update_descriptor_queue,
RescalingPushConstant& rescaling) {
static constexpr VideoCommon::ImageViewId NULL_IMAGE_VIEW_ID{0};
image_view_ids += Shader::NumDescriptors(info.texture_buffer_descriptors);
image_view_ids += Shader::NumDescriptors(info.image_buffer_descriptors);
const Shader::Info& info, RescalingPushConstant& rescaling,
const VkSampler*& samplers,
const VideoCommon::ImageViewInOut*& views) {
views += Shader::NumDescriptors(info.texture_buffer_descriptors);
views += Shader::NumDescriptors(info.image_buffer_descriptors);
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{*(image_view_ids++)};
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VkSampler sampler{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const Image& image{texture_cache.GetImage(image_view.image_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
rescaling.PushTexture(image_view_id != NULL_IMAGE_VIEW_ID &&
True(image.flags & VideoCommon::ImageFlagBits::Rescaled));
rescaling.PushTexture(True(image.flags & VideoCommon::ImageFlagBits::Rescaled));
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}

View file

@ -108,10 +108,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
texture_cache.SynchronizeComputeDescriptors();
static constexpr size_t max_elements = 64;
std::array<ImageId, max_elements> image_view_ids;
boost::container::static_vector<u32, max_elements> image_view_indices;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
boost::container::static_vector<VkSampler, max_elements> samplers;
boost::container::static_vector<bool, max_elements> image_view_blacklist;
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
@ -135,54 +133,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_image);
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.texture_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
const u32 black_list_base = image_view_indices.size();
bool atleast_one_blacklisted = false;
for (const auto& desc : info.image_descriptors) {
const bool is_black_listed =
desc.is_written && (desc.type == Shader::TextureType::Color2D ||
desc.type == Shader::TextureType::ColorArray2D);
for (u32 index = 0; index < desc.count; ++index) {
image_view_blacklist.push_back(is_black_listed);
}
atleast_one_blacklisted |= is_black_listed;
add_image(desc);
add_image(desc, true);
}
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
bool has_listed_stuffs;
do {
has_listed_stuffs = false;
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
if (atleast_one_blacklisted) {
for (u32 index = 0; index < image_view_blacklist.size(); index++) {
if (image_view_blacklist[index]) {
ImageView& image_view{
texture_cache.GetImageView(image_view_ids[index + black_list_base])};
has_listed_stuffs |= texture_cache.BlackListImage(image_view.image_id);
}
}
}
} while (has_listed_stuffs);
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
buffer_cache.UnbindComputeTextureBuffers();
ImageId* texture_buffer_ids{image_view_ids.data()};
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
@ -191,11 +172,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
ImageView& image_view = texture_cache.GetImageView(views[index].id);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++texture_buffer_ids;
++index;
}
}};
@ -207,9 +187,9 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
RescalingPushConstant rescaling(num_textures);
const VkSampler* samplers_it{samplers.data()};
const ImageId* views_it{image_view_ids.data()};
PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue,
rescaling);
const VideoCommon::ImageViewInOut* views_it{views.data()};
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
views_it);
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built

View file

@ -278,12 +278,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
std::array<bool, MAX_IMAGE_ELEMENTS> image_view_blacklist;
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{};
size_t image_index{};
size_t view_index{};
texture_cache.SynchronizeGraphicsDescriptors();
@ -291,8 +289,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto& regs{maxwell3d.regs};
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
u32 start_black_list = std::numeric_limits<u32>::max();
u32 end_black_list = 0;
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
const Shader::Info& info{stage_infos[stage]};
buffer_cache.UnbindGraphicsStorageBuffers(stage);
@ -329,7 +325,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.first;
views[view_index++] = {handle.first};
}
}};
if constexpr (Spec::has_texture_buffers) {
@ -345,7 +341,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.first;
views[view_index++] = {handle.first};
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_index++] = sampler->Handle();
@ -353,15 +349,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
if (desc.is_written && (desc.type == Shader::TextureType::Color2D ||
desc.type == Shader::TextureType::ColorArray2D)) {
auto index_copy = image_index;
for (u32 index = 0; index < desc.count; ++index) {
start_black_list = std::min<u32>(start_black_list, index_copy);
image_view_blacklist[index_copy++] = true;
end_black_list = std::max<u32>(end_black_list, index_copy);
}
}
add_image(desc);
}
}
@ -381,24 +368,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
const std::span indices_span(image_view_indices.data(), image_index);
bool has_listed_stuffs;
do {
has_listed_stuffs = false;
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
if constexpr (Spec::has_images) {
if (start_black_list < end_black_list) {
for (u32 index = start_black_list; index < end_black_list; index++) {
if (image_view_blacklist[index]) {
ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
has_listed_stuffs |= texture_cache.BlackListImage(image_view.image_id);
}
}
}
}
} while (has_listed_stuffs);
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index));
ImageId* texture_buffer_index{image_view_ids.data()};
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
@ -408,12 +380,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
++texture_buffer_index;
++texture_buffer_it;
}
}};
buffer_cache.UnbindGraphicsTextureBuffers(stage);
@ -429,9 +401,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
add_buffer(desc);
}
}
texture_buffer_index += Shader::NumDescriptors(info.texture_descriptors);
texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
if constexpr (Spec::has_images) {
texture_buffer_index += Shader::NumDescriptors(info.image_descriptors);
texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
}
}};
if constexpr (Spec::enabled_stages[0]) {
@ -457,11 +429,11 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
RescalingPushConstant rescaling(num_textures);
const VkSampler* samplers_it{samplers.data()};
const ImageId* views_it{image_view_ids.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
update_descriptor_queue, rescaling);
PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
samplers_it, views_it);
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);

View file

@ -730,10 +730,17 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con
}
} // Anonymous namespace
void TextureCacheRuntime::Init() {
resolution = Settings::values.resolution_info;
is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0;
}
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
ASTCDecoderPass& astc_decoder_pass_,
RenderPassCache& render_pass_cache_)
: device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_},
resolution{Settings::values.resolution_info},
is_rescaling_on(resolution.up_scale != 1 || resolution.down_shift != 0) {}
void TextureCacheRuntime::Finish() {
scheduler.Finish();
@ -1040,6 +1047,8 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
}
}
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
Image::~Image() = default;
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
@ -1187,8 +1196,7 @@ bool Image::ScaleDown(bool save_as_backup) {
}*/
const auto& resolution = runtime->resolution;
vk::Image downscaled_image =
MakeImage(runtime->device, info);
vk::Image downscaled_image = MakeImage(runtime->device, info);
MemoryCommit new_commit(
runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal));
@ -1301,7 +1309,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params} {}
VkImageView ImageView::DepthView() {

View file

@ -34,21 +34,16 @@ class RenderPassCache;
class StagingBufferPool;
class VKScheduler;
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
ASTCDecoderPass& astc_decoder_pass;
RenderPassCache& render_pass_cache;
class TextureCacheRuntime {
public:
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<vk::Image, TICKS_TO_DESTROY> prescaled_images;
DelayedDestructionRing<MemoryCommit, TICKS_TO_DESTROY> prescaled_commits;
Settings::ResolutionScalingInfo resolution;
bool is_rescaling_on{};
void Init();
explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
ASTCDecoderPass& astc_decoder_pass_,
RenderPassCache& render_pass_cache_);
void Finish();
@ -56,6 +51,10 @@ struct TextureCacheRuntime {
StagingBufferRef DownloadStagingBuffer(size_t size);
void TickFrame();
u64 GetDeviceLocalMemory() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
@ -84,15 +83,25 @@ struct TextureCacheRuntime {
return true;
}
void TickFrame();
const Device& device;
VKScheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
ASTCDecoderPass& astc_decoder_pass;
RenderPassCache& render_pass_cache;
u64 GetDeviceLocalMemory() const;
DelayedDestructionRing<vk::Image, TICKS_TO_DESTROY> prescaled_images;
DelayedDestructionRing<MemoryCommit, TICKS_TO_DESTROY> prescaled_commits;
Settings::ResolutionScalingInfo resolution;
bool is_rescaling_on{};
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
@ -151,7 +160,7 @@ public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
[[nodiscard]] VkImageView DepthView();