shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)

* shader_recompiler: Add LDEXP

* shader_recompiler: Add most image integer atomic ops

* shader_recompiler: Implement shared memory load/store

* shader_recompiler: More image atomics

* externals: Update sirit

* clang format

* cmake: Add missing files

* shader_recompiler: Fix some atomic bugs

* shader_recompiler: Vs outputs

* shader_recompiler: Shared mem has side-effects, fix format component order

* shader_recompiler: Inline constant buffer impl

* video_core: Fix regressions

* Work

* Fixup a few things
This commit is contained in:
TheTurtle 2024-07-05 00:15:44 +03:00 committed by GitHub
parent af3bbc33e9
commit 6ceab6dfac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 1597 additions and 310 deletions

View file

@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break;
}
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;

View file

@ -85,14 +85,14 @@ struct Liverpool {
} settings;
UserData user_data;
template <typename T = u8>
const T* Address() const {
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr);
return reinterpret_cast<const T>(addr);
}
std::span<const u32> Code() const {
const u32* code = Address<u32>();
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
@ -121,20 +121,26 @@ struct Liverpool {
BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs;
BitField<33, 5, u64> num_user_regs;
BitField<47, 9, u64> lds_dwords;
} settings;
INSERT_PADDING_WORDS(1);
u32 resource_limits;
INSERT_PADDING_WORDS(0x2A);
UserData user_data;
template <typename T = u8>
const T* Address() const {
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr);
return reinterpret_cast<const T>(addr);
}
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords.Value() * 128 * 4;
}
std::span<const u32> Code() const {
const u32* code = Address<u32>();
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
@ -144,7 +150,7 @@ struct Liverpool {
template <typename Shader>
static constexpr auto* GetBinaryInfo(const Shader& sh) {
const auto* code = sh.template Address<u32>();
const auto* code = sh.template Address<u32*>();
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
return bininfo;
@ -208,6 +214,10 @@ struct Liverpool {
BitField<18, 1, u32> use_vtx_render_target_idx;
BitField<19, 1, u32> use_vtx_viewport_idx;
BitField<20, 1, u32> use_vtx_kill_flag;
BitField<21, 1, u32> vs_out_misc_enable;
BitField<22, 1, u32> vs_out_ccdist0_enable;
BitField<23, 1, u32> vs_out_ccdist1_enable;
BitField<25, 1, u32> use_vtx_gs_cut_flag;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable.Value() >> index) & 1;
@ -469,7 +479,7 @@ struct Liverpool {
template <typename T = VAddr>
T Address() const {
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};
@ -1021,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);

View file

@ -549,8 +549,8 @@ struct PM4DumpConstRam {
u32 addr_hi;
template <typename T>
T* Address() const {
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
T Address() const {
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
}
[[nodiscard]] u32 Offset() const {
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
u32 addr_hi;
};
struct PM4CmdDrawIndexBufferSize {
PM4Type3Header header;
u32 num_indices;
};
struct PM4CmdIndirectBuffer {
PM4Type3Header header;
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned

View file

@ -21,32 +21,45 @@ enum class CompSwizzle : u32 {
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer {
union {
BitField<0, 44, u64> base_address;
BitField<48, 14, u64> stride;
BitField<62, 1, u64> cache_swizzle;
BitField<63, 1, u64> swizzle_enable;
};
u64 base_address : 44;
u64 : 4;
u64 stride : 14;
u64 cache_swizzle : 1;
u64 swizzle_enable : 1;
u32 num_records;
union {
BitField<0, 3, u32> dst_sel_x;
BitField<3, 3, u32> dst_sel_y;
BitField<6, 3, u32> dst_sel_z;
BitField<9, 3, u32> dst_sel_w;
BitField<0, 12, u32> dst_sel;
BitField<12, 3, NumberFormat> num_format;
BitField<15, 4, DataFormat> data_format;
BitField<19, 2, u32> element_size;
BitField<21, 2, u32> index_stride;
BitField<23, 1, u32> add_tid_enable;
};
u32 dst_sel_x : 3;
u32 dst_sel_y : 3;
u32 dst_sel_z : 3;
u32 dst_sel_w : 3;
u32 num_format : 3;
u32 data_format : 4;
u32 element_size : 2;
u32 index_stride : 2;
u32 add_tid_enable : 1;
operator bool() const noexcept {
return base_address != 0;
}
bool operator==(const Buffer& other) const noexcept {
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
}
NumberFormat GetNumberFmt() const noexcept {
return static_cast<NumberFormat>(num_format);
}
DataFormat GetDataFmt() const noexcept {
return static_cast<DataFormat>(data_format);
}
u32 GetStride() const noexcept {
return stride == 0 ? 1U : stride.Value();
return stride == 0 ? 1U : stride;
}
u32 GetStrideElements(u32 element_size) const noexcept {
@ -61,6 +74,7 @@ struct Buffer {
return GetStride() * num_records;
}
};
static_assert(sizeof(Buffer) == 16); // 128bits
enum class ImageType : u64 {
Buffer = 0,

View file

@ -392,6 +392,36 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR16G16B16A16Snorm;
}
if (data_format == AmdGpu::DataFormat::Format32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32Uint;
}
if (data_format == AmdGpu::DataFormat::Format4_4_4_4 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR4G4B4A4UnormPack16;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR16G16B16A16Uint;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32A32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8Sint;
}
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc1RgbaSrgbBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16Sint;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -14,8 +14,8 @@ namespace Vulkan {
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
u64 compute_key_, vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} {
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
@ -85,15 +85,15 @@ ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};
for (const auto& buffer : info.buffers) {
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const auto vsharp = buffer.GetVsharp(info);
const u32 size = vsharp.GetSize();
const VAddr address = vsharp.base_address.Value();
const VAddr address = vsharp.base_address;
texture_cache.OnCpuWrite(address);
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()

View file

@ -24,7 +24,7 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, const Shader::Info* info,
vk::ShaderModule module);
u64 compute_key, vk::ShaderModule module);
~ComputePipeline();
[[nodiscard]] vk::Pipeline Handle() const noexcept {
@ -40,6 +40,7 @@ private:
vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout;
u64 compute_key;
Shader::Info info{};
};

View file

@ -47,7 +47,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
bindings.push_back({
@ -326,8 +326,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const VAddr address = vsharp.base_address.Value();
const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
@ -419,8 +419,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
continue;
}
guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address.Value(),
buffer.base_address.Value() + buffer.GetSize());
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
return lhv.base_address < rhv.base_address;

View file

@ -74,12 +74,12 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
available_extensions = GetSupportedExtensions(physical_device);
properties = physical_device.getProperties();
CollectDeviceParameters();
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion));
CollectDeviceParameters();
CreateDevice();
CollectToolingInfo();
}
@ -156,6 +156,7 @@ bool Instance::CreateDevice() {
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
@ -208,12 +209,14 @@ bool Instance::CreateDevice() {
.shaderImageGatherExtended = true,
.shaderStorageImageMultisample = true,
.shaderClipDistance = features.shaderClipDistance,
.shaderInt16 = true,
},
},
vk::PhysicalDeviceVulkan11Features{
.shaderDrawParameters = true,
},
vk::PhysicalDeviceVulkan12Features{
.shaderFloat16 = true,
.scalarBlockLayout = true,
.uniformBufferStandardLayout = true,
.hostQueryReset = true,
@ -237,7 +240,12 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
.depthClipControl = true,
},
};
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
.workgroupMemoryExplicitLayout = true,
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true,
}};
if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();

View file

@ -18,6 +18,52 @@ extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
namespace Vulkan {
using Shader::VsOutput;
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
w != VsOutput::None) {
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
}
};
// VS_OUT_MISC_VEC
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag
? VsOutput::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag
? VsOutput::KillFlag
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
// VS_OUT_CCDIST0
add_output(ctl.IsClipDistEnabled(0)
? VsOutput::ClipDist0
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1)
? VsOutput::ClipDist1
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2)
? VsOutput::ClipDist2
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3)
? VsOutput::ClipDist3
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
// VS_OUT_CCDIST1
add_output(ctl.IsClipDistEnabled(4)
? VsOutput::ClipDist4
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5)
? VsOutput::ClipDist5
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6)
? VsOutput::ClipDist6
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7)
? VsOutput::ClipDist7
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
@ -26,6 +72,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
switch (stage) {
case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs;
BuildVsOutputs(info, regs.vs_output_control);
break;
}
case Shader::Stage::Fragment: {
@ -45,6 +92,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
info.num_user_data = cs_pgm.settings.num_user_regs;
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full};
info.shared_memory_size = cs_pgm.SharedMemSize();
break;
}
default:
@ -60,6 +108,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
profile = Shader::Profile{
.supported_spirv = 0x00010600U,
.support_explicit_workgroup_layout = true,
};
}
@ -153,7 +202,7 @@ void PipelineCache::RefreshGraphicsKey() {
for (u32 i = 0; i < MaxShaderStages; i++) {
auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
if (!pgm || !pgm->Address<u32*>()) {
key.stage_hashes[i] = 0;
continue;
}
@ -209,7 +258,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>();
info.pgm_hash = hash;
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
@ -247,8 +298,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
const Shader::Info info =
Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
info.pgm_base = cs_pgm.Address<uintptr_t>();
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
@ -258,8 +310,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
}
const auto module = CompileSPV(spv_code, instance.GetDevice());
// Set module name to hash in renderdoc
const auto name = fmt::format("cs_{:#x}", compute_key);
Vulkan::SetObjectName(instance.GetDevice(), module, name);
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
&program.info, module);
&program.info, compute_key, module);
} catch (const Shader::Exception& e) {
UNREACHABLE_MSG("{}", e.what());
return nullptr;

View file

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
@ -44,11 +44,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return;
}
UpdateDynamicState(*pipeline);
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
try {
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
} catch (...) {
UNREACHABLE();
}
BeginRendering();
UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
@ -71,9 +74,14 @@ void Rasterizer::DispatchDirect() {
return;
}
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!has_resources) {
return;
try {
const auto has_resources =
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!has_resources) {
return;
}
} catch (...) {
UNREACHABLE();
}
scheduler.EndRendering();
@ -163,7 +171,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
// Upload index data to stream buffer.
const auto index_address = regs.index_base_address.Address<const void*>();
const u32 index_buffer_size = regs.num_indices * index_size;
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
std::memcpy(data, index_address, index_buffer_size);
vertex_index_buffer.Commit(index_buffer_size);

View file

@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
// scheduler.Wait(watch.tick);
scheduler.Wait(watch.tick);
++wait_cursor;
}
}

View file

@ -321,12 +321,15 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// Copy to the image.
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
? vk::ImageAspectFlagBits::eDepth
: aspect_mask;
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height,
.imageSubresource{
.aspectMask = aspect_mask,
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,

View file

@ -77,7 +77,6 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
if (usage_override) {
usage_ci.usage = usage_override.value();
}
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format;
vk::ImageAspectFlags aspect = image.aspect_mask;

View file

@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
image_id = image_ids[0];
}
RegisterMeta(info, image_id);
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image);
TrackImage(image, image_id);
}
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
const ImageId image_id = FindImage(info, buffer.Address(), false);
const ImageId image_id = FindImage(info, buffer.Address());
Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified;

View file

@ -179,19 +179,19 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR8Unorm:
return vk::Format::eR8Uint;
case vk::Format::eR8G8Unorm:
case vk::Format::eR16Sfloat:
return vk::Format::eR8G8Uint;
case vk::Format::eR8G8B8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Unorm:
[[fallthrough]];
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eR32G32Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc3SrgbBlock:
[[fallthrough]];
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock: