shader_recompiler: Small instruction parsing refactor/bugfixes (#340)

* translator: Implemtn f32 to f16 convert

* shader_recompiler: Add bit instructions

* shader_recompiler: More data share instructions

* shader_recompiler: Remove exec contexts, fix S_MOV_B64

* shader_recompiler: Split instruction parsing into categories

* shader_recompiler: Better BFS search

* shader_recompiler: Constant propagation pass for cmp_class_f32

* shader_recompiler: Partial readfirstlane implementation

* shader_recompiler: Stub readlane/writelane only for non-compute

* hack: Fix swizzle on RDR

* Will properly fix this when merging this

* clang format

* address_space: Bump user area size to full

* shader_recompiler: V_INTERP_MOV_F32

* Should work the same as spirv will emit flat decoration on demand

* kernel: Add MAP_OP_MAP_FLEXIBLE

* image_view: Attempt to apply storage swizzle on format

* vk_scheduler: Barrier attachments on renderpass end

* clang format

* liverpool: cs state backup

* shader_recompiler: More instructions and formats

* vector_alu: Proper V_MBCNT_U32_B32

* shader_recompiler: Port some dark souls things

* file_system: Implement sceKernelRename

* more formats

* clang format

* resource_tracking_pass: Back to assert

* translate: Tracedata

* kernel: Remove tracy lock

* Solves random crashes in Dark Souls

* code: Review comments
This commit is contained in:
TheTurtle 2024-07-31 00:32:40 +03:00 committed by GitHub
parent ac6dc20c3b
commit a7c9bfa5c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 1349 additions and 904 deletions

View file

@ -297,6 +297,7 @@ std::span<const vk::Format> GetAllFormats() {
vk::Format::eBc3UnormBlock,
vk::Format::eBc4UnormBlock,
vk::Format::eBc5UnormBlock,
vk::Format::eBc5SnormBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eBc7UnormBlock,
vk::Format::eD16Unorm,
@ -308,6 +309,7 @@ std::span<const vk::Format> GetAllFormats() {
vk::Format::eR8G8B8A8Srgb,
vk::Format::eR8G8B8A8Uint,
vk::Format::eR8G8B8A8Unorm,
vk::Format::eR8G8B8A8Snorm,
vk::Format::eR8G8B8A8Uscaled,
vk::Format::eR8G8Snorm,
vk::Format::eR8G8Uint,
@ -335,6 +337,10 @@ std::span<const vk::Format> GetAllFormats() {
vk::Format::eR32Sfloat,
vk::Format::eR32Sint,
vk::Format::eR32Uint,
vk::Format::eBc6HUfloatBlock,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16B16A16Sscaled,
vk::Format::eR16G16Sscaled,
};
return formats;
}
@ -384,10 +390,17 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc5UnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eBc5SnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16B16A16Sint;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Sscaled) {
return vk::Format::eR16G16B16A16Sscaled;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16Sfloat;
@ -496,6 +509,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16Sint;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Sscaled) {
return vk::Format::eR16G16Sscaled;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Uscaled) {
return vk::Format::eR8G8B8A8Uscaled;
@ -518,6 +535,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::SnormNz) {
return vk::Format::eR16G16B16A16Snorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR8G8B8A8Snorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc6HUfloatBlock;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -148,7 +148,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
}
}
for (const auto& sampler : info.samplers) {
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
const auto ssharp = sampler.GetSsharp(info);
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({

View file

@ -386,7 +386,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
}
}
for (const auto& sampler : stage.samplers) {
auto ssharp = stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
auto ssharp = sampler.GetSsharp(stage);
if (sampler.disable_aniso) {
const auto& tsharp = tsharps[sampler.associated_image];
if (tsharp.base_level == 0 && tsharp.last_level == 0) {

View file

@ -164,10 +164,11 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceVulkan13Features,
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
const vk::StructureChain properties_chain =
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
const vk::StructureChain properties_chain = physical_device.getProperties2<
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
features = feature_chain.get().features;
if (available_extensions.empty()) {
@ -261,6 +262,7 @@ bool Instance::CreateDevice() {
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
.shaderClipDistance = features.shaderClipDistance,
.shaderInt64 = features.shaderInt64,
.shaderInt16 = features.shaderInt16,
},
},

View file

@ -188,6 +188,11 @@ public:
return properties.limits.nonCoherentAtomSize;
}
/// Returns the subgroup size of the selected physical device.
u32 SubgroupSize() const {
return subgroup_size;
}
/// Returns the maximum supported elements in a texel buffer
u32 MaxTexelBufferElements() const {
return properties.limits.maxTexelBufferElements;
@ -249,6 +254,7 @@ private:
bool workgroup_memory_explicit_layout{};
bool color_write_en{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};
bool debug_utils_supported{};
bool has_nsight_graphics{};

View file

@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
profile = Shader::Profile{
.supported_spirv = 0x00010600U,
.subgroup_size = instance.SubgroupSize(),
.support_explicit_workgroup_layout = true,
};
}
@ -268,7 +269,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>();
info.pgm_hash = hash;
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
programs[i] =
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
// Compile IR to SPIR-V
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
@ -308,7 +310,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
info.pgm_base = cs_pgm.Address<uintptr_t>();
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
auto program =
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
// Compile IR to SPIR-V
u32 binding{};

View file

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
@ -128,6 +128,7 @@ void Rasterizer::BeginRendering() {
state.height = std::min<u32>(state.height, image.info.size.height);
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
state.color_images[state.num_color_attachments] = image.image;
state.color_attachments[state.num_color_attachments++] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
@ -152,6 +153,7 @@ void Rasterizer::BeginRendering() {
const auto& image = texture_cache.GetImage(image_view.image_id);
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
state.depth_image = image.image;
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = image.layout,

View file

@ -50,7 +50,32 @@ void Scheduler::EndRendering() {
return;
}
is_rendering = false;
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
barriers.push_back(vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = render_state.color_images[i],
.subresourceRange =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
}
current_cmdbuf.endRendering();
if (!barriers.empty()) {
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eFragmentShader,
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
}
}
void Scheduler::Flush(SubmitInfo& info) {

View file

@ -15,7 +15,9 @@ class Instance;
struct RenderState {
std::array<vk::RenderingAttachmentInfo, 8> color_attachments{};
std::array<vk::Image, 8> color_images{};
vk::RenderingAttachmentInfo depth_attachment{};
vk::Image depth_image{};
u32 num_color_attachments{};
u32 num_depth_attachments{};
u32 width = std::numeric_limits<u32>::max();