shader_recompiler: Better branch detection + more opcodes

This commit is contained in:
raphaelthegreat 2024-06-01 20:25:31 +03:00
parent f624f7749c
commit 02a50265f8
31 changed files with 772 additions and 120 deletions

View file

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/io_file.h"
#include "common/thread.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h"

View file

@ -374,10 +374,16 @@ struct Liverpool {
FrontAndBack = 3,
};
enum class FrontFace : u32 {
CounterClockwise = 0,
Clockwise = 1,
};
union PolygonControl {
u32 raw;
BitField<0, 1, u32> cull_front;
BitField<1, 1, u32> cull_back;
BitField<2, 1, FrontFace> front_face;
BitField<3, 2, u32> enable_polygon_mode;
BitField<5, 3, PolygonMode> polygon_mode_front;
BitField<8, 3, PolygonMode> polygon_mode_back;

View file

@ -110,11 +110,29 @@ struct Image {
BitField<59, 1, u64> atc;
BitField<60, 4, ImageType> type;
};
union {
BitField<0, 13, u64> depth;
BitField<13, 14, u64> pitch;
BitField<32, 13, u64> base_array;
BitField<45, 13, u64> last_array;
};
VAddr Address() const {
return base_address << 8;
}
u32 Pitch() const {
return pitch;
}
u32 NumLayers() const {
return last_array - base_array + 1;
}
u32 NumLevels() const {
return last_level + 1;
}
DataFormat GetDataFmt() const noexcept {
return static_cast<DataFormat>(data_format.Value());
}

View file

@ -287,7 +287,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
return vk::Format::eB8G8R8A8Srgb;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
@ -304,6 +304,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8Unorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc3SrgbBlock;
}
UNREACHABLE();
}

View file

@ -75,8 +75,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
.cullMode = LiverpoolToVK::CullMode(key.cull_mode),
.frontFace = vk::FrontFace::eClockwise,
.cullMode = vk::CullModeFlagBits::eNone, /*LiverpoolToVK::CullMode(key.cull_mode),*/
.frontFace = key.front_face == Liverpool::FrontFace::Clockwise
? vk::FrontFace::eClockwise
: vk::FrontFace::eCounterClockwise,
.depthBiasEnable = false,
.lineWidth = 1.0f,
};
@ -177,14 +179,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
for (u32 i = 0; i < num_color_formats; i++) {
const auto& control = key.blend_controls[i];
const auto src_color = LiverpoolToVK::BlendFactor(control.color_src_factor);
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
const auto color_blend = LiverpoolToVK::BlendOp(control.color_func);
attachments[i] = vk::PipelineColorBlendAttachmentState{
.blendEnable = key.blend_controls[i].enable,
.srcColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_src_factor),
.dstColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor),
.colorBlendOp = LiverpoolToVK::BlendOp(control.color_func),
.srcAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.alpha_src_factor),
.dstAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor),
.alphaBlendOp = LiverpoolToVK::BlendOp(control.alpha_func),
.srcColorBlendFactor = src_color,
.dstColorBlendFactor = dst_color,
.colorBlendOp = color_blend,
.srcAlphaBlendFactor = control.separate_alpha_blend
? LiverpoolToVK::BlendFactor(control.alpha_src_factor)
: src_color,
.dstAlphaBlendFactor = control.separate_alpha_blend
? LiverpoolToVK::BlendFactor(control.alpha_dst_factor)
: dst_color,
.alphaBlendOp = control.separate_alpha_blend
? LiverpoolToVK::BlendOp(control.alpha_func)
: color_blend,
.colorWriteMask =
instance.IsColorWriteEnableSupported()
? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |

View file

@ -38,6 +38,8 @@ struct GraphicsPipelineKey {
Liverpool::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode;
Liverpool::CullMode cull_mode;
Liverpool::FrontFace front_face;
u32 pad{};
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;

View file

@ -207,6 +207,7 @@ bool Instance::CreateDevice() {
.shaderDrawParameters = true,
},
vk::PhysicalDeviceVulkan12Features{
.scalarBlockLayout = true,
.timelineSemaphore = true,
},
vk::PhysicalDeviceVulkan13Features{

View file

@ -94,6 +94,7 @@ void PipelineCache::RefreshGraphicsKey() {
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
key.cull_mode = regs.polygon_control.CullingMode();
key.front_face = regs.polygon_control.front_face;
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
@ -163,10 +164,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
// Set module name to hash in renderdoc
const auto name = fmt::format("{}_{:#x}", stage, hash);
const vk::DebugUtilsObjectNameInfoEXT name_info = {
.objectType = vk::ObjectType::eShaderModule,
.objectHandle = std::bit_cast<u64>(stages[i]),
.pObjectName = name.c_str(),
};
instance.GetDevice().setDebugUtilsObjectNameEXT(name_info);
if (Config::dumpShaders()) {
DumpShader(spv_code, hash, stage, "spv");
}

View file

@ -85,6 +85,7 @@ void Rasterizer::Draw(bool is_indexed) {
}
void Rasterizer::DispatchDirect() {
compute_done = true;
return;
const auto cmdbuf = scheduler.CommandBuffer();
const auto& cs_program = liverpool->regs.cs_program;

View file

@ -49,6 +49,7 @@ private:
Core::MemoryManager* memory;
PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer;
bool compute_done{};
};
} // namespace Vulkan

View file

@ -39,7 +39,9 @@ using Libraries::VideoOut::TilingMode;
if (false /*&& IsDepthStencilFormat(format)*/) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
usage |= vk::ImageUsageFlagBits::eColorAttachment;
if (format != vk::Format::eBc3SrgbBlock) {
usage |= vk::ImageUsageFlagBits::eColorAttachment;
}
}
return usage;
}
@ -101,8 +103,10 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
size.width = image.width + 1;
size.height = image.height + 1;
size.depth = 1;
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
// TODO: Derive this properly from tiling params
pitch = size.width;
guest_size_bytes = size.width * size.height * 4;
}
@ -183,7 +187,7 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = 1,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}};

View file

@ -14,8 +14,9 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) {
case AmdGpu::ImageType::Color1DArray:
return vk::ImageViewType::e1DArray;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
return vk::ImageViewType::e2D;
case AmdGpu::ImageType::Cube:
return vk::ImageViewType::eCube;
case AmdGpu::ImageType::Color2DArray:
return vk::ImageViewType::e2DArray;
case AmdGpu::ImageType::Color3D:
@ -47,10 +48,10 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
type = ConvertImageViewType(image.type);
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
range.base.level = image.base_level;
range.base.level = 0;
range.base.layer = 0;
range.extent.levels = 1;
range.extent.layers = 1;
range.extent.levels = image.NumLevels();
range.extent.layers = image.NumLayers();
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);

View file

@ -175,48 +175,94 @@ void TextureCache::RefreshImage(Image& image) {
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
if (image.info.is_tiled) {
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
Config::isNeoMode());
} else {
std::memcpy(data, image_data, image.info.guest_size_bytes);
}
staging.Commit(image.info.guest_size_bytes);
{
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
if (image.info.is_tiled) {
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
Config::isNeoMode());
} else {
std::memcpy(data, image_data, image.info.guest_size_bytes);
}
staging.Commit(image.info.guest_size_bytes);
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
const auto cmdbuf = scheduler.CommandBuffer();
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
.layerCount = VK_REMAINING_ARRAY_LAYERS,
};
const auto cmdbuf = scheduler.CommandBuffer();
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
};
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
vk::ImageLayout::eTransferDstOptimal, image_copy);
cmdbuf.copyBufferToImage(staging.Handle(), image.image, vk::ImageLayout::eTransferDstOptimal,
image_copy);
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
return;
}
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
for (u32 l = 0; l < image.info.resources.layers; l++) {
// Upload data to the staging buffer.
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = image.info.size.width >> m;
const u32 height = image.info.size.height >> m;
const u32 map_size = width * height;
const auto [data, offset, _] = staging.Map(map_size, 16);
if (image.info.is_tiled) {
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
} else {
std::memcpy(data, image_data, map_size);
}
staging.Commit(map_size);
image_data += map_size;
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = m,
.baseArrayLayer = l,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, 1},
};
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
vk::ImageLayout::eTransferDstOptimal, image_copy);
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
}
}
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {