mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-08 03:33:14 +00:00
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert * shader_recompiler: Add bit instructions * shader_recompiler: More data share instructions * shader_recompiler: Remove exec contexts, fix S_MOV_B64 * shader_recompiler: Split instruction parsing into categories * shader_recompiler: Better BFS search * shader_recompiler: Constant propagation pass for cmp_class_f32 * shader_recompiler: Partial readfirstlane implementation * shader_recompiler: Stub readlane/writelane only for non-compute * hack: Fix swizzle on RDR * Will properly fix this when merging this * clang format * address_space: Bump user area size to full * shader_recompiler: V_INTERP_MOV_F32 * Should work the same as spirv will emit flat decoration on demand * kernel: Add MAP_OP_MAP_FLEXIBLE * image_view: Attempt to apply storage swizzle on format * vk_scheduler: Barrier attachments on renderpass end * clang format * liverpool: cs state backup * shader_recompiler: More instructions and formats * vector_alu: Proper V_MBCNT_U32_B32 * shader_recompiler: Port some dark souls things * file_system: Implement sceKernelRename * more formats * clang format * resource_tracking_pass: Back to assert * translate: Tracedata * kernel: Remove tracy lock * Solves random crashes in Dark Souls * code: Review comments
This commit is contained in:
parent
ac6dc20c3b
commit
a7c9bfa5c5
66 changed files with 1349 additions and 904 deletions
|
@ -297,6 +297,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eD16Unorm,
|
||||
|
@ -308,6 +309,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8B8A8Uint,
|
||||
vk::Format::eR8G8B8A8Unorm,
|
||||
vk::Format::eR8G8B8A8Snorm,
|
||||
vk::Format::eR8G8B8A8Uscaled,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8G8Uint,
|
||||
|
@ -335,6 +337,10 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR32Sint,
|
||||
vk::Format::eR32Uint,
|
||||
vk::Format::eBc6HUfloatBlock,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16B16A16Sscaled,
|
||||
vk::Format::eR16G16Sscaled,
|
||||
};
|
||||
return formats;
|
||||
}
|
||||
|
@ -384,10 +390,17 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc5UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eBc5SnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16B16A16Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sscaled) {
|
||||
return vk::Format::eR16G16B16A16Sscaled;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
|
@ -496,6 +509,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sscaled) {
|
||||
return vk::Format::eR16G16Sscaled;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Uscaled) {
|
||||
return vk::Format::eR8G8B8A8Uscaled;
|
||||
|
@ -518,6 +535,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::SnormNz) {
|
||||
return vk::Format::eR16G16B16A16Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR8G8B8A8Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc6HUfloatBlock;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : info.samplers) {
|
||||
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
const auto ssharp = sampler.GetSsharp(info);
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -386,7 +386,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : stage.samplers) {
|
||||
auto ssharp = stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
auto ssharp = sampler.GetSsharp(stage);
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = tsharps[sampler.associated_image];
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
|
|
|
@ -164,10 +164,11 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceVulkan13Features,
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||
const vk::StructureChain properties_chain =
|
||||
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
|
||||
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
|
||||
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
|
||||
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
|
||||
|
||||
features = feature_chain.get().features;
|
||||
if (available_extensions.empty()) {
|
||||
|
@ -261,6 +262,7 @@ bool Instance::CreateDevice() {
|
|||
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
||||
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
.shaderInt64 = features.shaderInt64,
|
||||
.shaderInt16 = features.shaderInt16,
|
||||
},
|
||||
},
|
||||
|
|
|
@ -188,6 +188,11 @@ public:
|
|||
return properties.limits.nonCoherentAtomSize;
|
||||
}
|
||||
|
||||
/// Returns the subgroup size of the selected physical device.
|
||||
u32 SubgroupSize() const {
|
||||
return subgroup_size;
|
||||
}
|
||||
|
||||
/// Returns the maximum supported elements in a texel buffer
|
||||
u32 MaxTexelBufferElements() const {
|
||||
return properties.limits.maxTexelBufferElements;
|
||||
|
@ -249,6 +254,7 @@ private:
|
|||
bool workgroup_memory_explicit_layout{};
|
||||
bool color_write_en{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool has_nsight_graphics{};
|
||||
|
|
|
@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
@ -268,7 +269,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
programs[i] =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
|
@ -308,7 +310,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
auto program =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
u32 binding{};
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
@ -128,6 +128,7 @@ void Rasterizer::BeginRendering() {
|
|||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
|
||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
||||
state.color_images[state.num_color_attachments] = image.image;
|
||||
state.color_attachments[state.num_color_attachments++] = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
|
@ -152,6 +153,7 @@ void Rasterizer::BeginRendering() {
|
|||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
state.depth_image = image.image;
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image.layout,
|
||||
|
|
|
@ -50,7 +50,32 @@ void Scheduler::EndRendering() {
|
|||
return;
|
||||
}
|
||||
is_rendering = false;
|
||||
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = render_state.color_images[i],
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
}
|
||||
current_cmdbuf.endRendering();
|
||||
if (!barriers.empty()) {
|
||||
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eFragmentShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::Flush(SubmitInfo& info) {
|
||||
|
|
|
@ -15,7 +15,9 @@ class Instance;
|
|||
|
||||
struct RenderState {
|
||||
std::array<vk::RenderingAttachmentInfo, 8> color_attachments{};
|
||||
std::array<vk::Image, 8> color_images{};
|
||||
vk::RenderingAttachmentInfo depth_attachment{};
|
||||
vk::Image depth_image{};
|
||||
u32 num_color_attachments{};
|
||||
u32 num_depth_attachments{};
|
||||
u32 width = std::numeric_limits<u32>::max();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue