mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-08 03:33:14 +00:00
shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)
* shader_recompiler: Add LDEXP * shader_recompiler: Add most image integer atomic ops * shader_recompiler: Implement shared memory load/store * shader_recompiler: More image atomics * externals: Update sirit * clang format * cmake: Add missing files * shader_recompiler: Fix some atomic bugs * shader_recompiler: Vs outputs * shader_recompiler: Shared mem has side-effects, fix format component order * shader_recompiler: Inline constant buffer impl * video_core: Fix regressions * Work * Fixup a few things
This commit is contained in:
parent
af3bbc33e9
commit
6ceab6dfac
69 changed files with 1597 additions and 310 deletions
|
@ -392,6 +392,36 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR16G16B16A16Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32G32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format4_4_4_4 &&
|
||||
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR16G16B16A16Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR8Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eBc1RgbaSrgbBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16Sint;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -14,8 +14,8 @@ namespace Vulkan {
|
|||
|
||||
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
|
||||
vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
|
||||
u64 compute_key_, vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} {
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = module,
|
||||
|
@ -85,15 +85,15 @@ ComputePipeline::~ComputePipeline() = default;
|
|||
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
u32 binding{};
|
||||
|
||||
for (const auto& buffer : info.buffers) {
|
||||
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const auto vsharp = buffer.GetVsharp(info);
|
||||
const u32 size = vsharp.GetSize();
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
const VAddr address = vsharp.base_address;
|
||||
texture_cache.OnCpuWrite(address);
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
buffer.is_storage ? instance.StorageMinAlignment()
|
||||
|
|
|
@ -24,7 +24,7 @@ class ComputePipeline {
|
|||
public:
|
||||
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
||||
vk::PipelineCache pipeline_cache, const Shader::Info* info,
|
||||
vk::ShaderModule module);
|
||||
u64 compute_key, vk::ShaderModule module);
|
||||
~ComputePipeline();
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
|
@ -40,6 +40,7 @@ private:
|
|||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
u64 compute_key;
|
||||
Shader::Info info{};
|
||||
};
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
attributes.push_back({
|
||||
.location = input.binding,
|
||||
.binding = input.binding,
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
});
|
||||
bindings.push_back({
|
||||
|
@ -326,8 +326,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
|
||||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
const auto vsharp = buffer.GetVsharp(stage);
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
buffer.is_storage ? instance.StorageMinAlignment()
|
||||
|
@ -419,8 +419,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
|||
continue;
|
||||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
ranges.emplace_back(buffer.base_address.Value(),
|
||||
buffer.base_address.Value() + buffer.GetSize());
|
||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||
}
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
|
|
|
@ -74,12 +74,12 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
|
|||
|
||||
available_extensions = GetSupportedExtensions(physical_device);
|
||||
properties = physical_device.getProperties();
|
||||
CollectDeviceParameters();
|
||||
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
|
||||
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
|
||||
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
|
||||
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion));
|
||||
|
||||
CollectDeviceParameters();
|
||||
CreateDevice();
|
||||
CollectToolingInfo();
|
||||
}
|
||||
|
@ -156,6 +156,7 @@ bool Instance::CreateDevice() {
|
|||
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||
// The next two extensions are required to be available together in order to support write masks
|
||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
|
@ -208,12 +209,14 @@ bool Instance::CreateDevice() {
|
|||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageMultisample = true,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
.shaderInt16 = true,
|
||||
},
|
||||
},
|
||||
vk::PhysicalDeviceVulkan11Features{
|
||||
.shaderDrawParameters = true,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan12Features{
|
||||
.shaderFloat16 = true,
|
||||
.scalarBlockLayout = true,
|
||||
.uniformBufferStandardLayout = true,
|
||||
.hostQueryReset = true,
|
||||
|
@ -237,7 +240,12 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
||||
.depthClipControl = true,
|
||||
},
|
||||
};
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
|
||||
.workgroupMemoryExplicitLayout = true,
|
||||
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
|
||||
.workgroupMemoryExplicitLayout8BitAccess = true,
|
||||
.workgroupMemoryExplicitLayout16BitAccess = true,
|
||||
}};
|
||||
|
||||
if (!color_write_en) {
|
||||
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
|
||||
|
|
|
@ -18,6 +18,52 @@ extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::VsOutput;
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||
ctl.use_vtx_edge_flag
|
||||
? VsOutput::EdgeFlag
|
||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||
ctl.use_vtx_kill_flag
|
||||
? VsOutput::KillFlag
|
||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
||||
// VS_OUT_CCDIST0
|
||||
add_output(ctl.IsClipDistEnabled(0)
|
||||
? VsOutput::ClipDist0
|
||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(1)
|
||||
? VsOutput::ClipDist1
|
||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(2)
|
||||
? VsOutput::ClipDist2
|
||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(3)
|
||||
? VsOutput::ClipDist3
|
||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
||||
// VS_OUT_CCDIST1
|
||||
add_output(ctl.IsClipDistEnabled(4)
|
||||
? VsOutput::ClipDist4
|
||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(5)
|
||||
? VsOutput::ClipDist5
|
||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(6)
|
||||
? VsOutput::ClipDist6
|
||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(7)
|
||||
? VsOutput::ClipDist7
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
|
@ -26,6 +72,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
|||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
|
@ -45,6 +92,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
|||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -60,6 +108,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -153,7 +202,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
if (!pgm || !pgm->Address<u32>()) {
|
||||
if (!pgm || !pgm->Address<u32*>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -209,7 +258,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
@ -247,8 +298,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||
const Shader::Info info =
|
||||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
@ -258,8 +310,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
|
||||
}
|
||||
const auto module = CompileSPV(spv_code, instance.GetDevice());
|
||||
// Set module name to hash in renderdoc
|
||||
const auto name = fmt::format("cs_{:#x}", compute_key);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
|
||||
&program.info, module);
|
||||
&program.info, compute_key, module);
|
||||
} catch (const Shader::Exception& e) {
|
||||
UNREACHABLE_MSG("{}", e.what());
|
||||
return nullptr;
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
@ -44,11 +44,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
return;
|
||||
}
|
||||
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
try {
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
} catch (...) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
BeginRendering();
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
if (is_indexed) {
|
||||
|
@ -71,9 +74,14 @@ void Rasterizer::DispatchDirect() {
|
|||
return;
|
||||
}
|
||||
|
||||
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
if (!has_resources) {
|
||||
return;
|
||||
try {
|
||||
const auto has_resources =
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
if (!has_resources) {
|
||||
return;
|
||||
}
|
||||
} catch (...) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
scheduler.EndRendering();
|
||||
|
@ -163,7 +171,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
|
|||
|
||||
// Upload index data to stream buffer.
|
||||
const auto index_address = regs.index_base_address.Address<const void*>();
|
||||
const u32 index_buffer_size = regs.num_indices * index_size;
|
||||
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
|
||||
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
|
||||
std::memcpy(data, index_address, index_buffer_size);
|
||||
vertex_index_buffer.Commit(index_buffer_size);
|
||||
|
|
|
@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
|||
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
// scheduler.Wait(watch.tick);
|
||||
scheduler.Wait(watch.tick);
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue