mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-30 23:33:17 +00:00
shader_recompiler: Better branch detection + more opcodes
This commit is contained in:
parent
f624f7749c
commit
02a50265f8
31 changed files with 772 additions and 120 deletions
|
@ -2,7 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
|
|
|
@ -374,10 +374,16 @@ struct Liverpool {
|
|||
FrontAndBack = 3,
|
||||
};
|
||||
|
||||
enum class FrontFace : u32 {
|
||||
CounterClockwise = 0,
|
||||
Clockwise = 1,
|
||||
};
|
||||
|
||||
union PolygonControl {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> cull_front;
|
||||
BitField<1, 1, u32> cull_back;
|
||||
BitField<2, 1, FrontFace> front_face;
|
||||
BitField<3, 2, u32> enable_polygon_mode;
|
||||
BitField<5, 3, PolygonMode> polygon_mode_front;
|
||||
BitField<8, 3, PolygonMode> polygon_mode_back;
|
||||
|
|
|
@ -110,11 +110,29 @@ struct Image {
|
|||
BitField<59, 1, u64> atc;
|
||||
BitField<60, 4, ImageType> type;
|
||||
};
|
||||
union {
|
||||
BitField<0, 13, u64> depth;
|
||||
BitField<13, 14, u64> pitch;
|
||||
BitField<32, 13, u64> base_array;
|
||||
BitField<45, 13, u64> last_array;
|
||||
};
|
||||
|
||||
VAddr Address() const {
|
||||
return base_address << 8;
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
return pitch;
|
||||
}
|
||||
|
||||
u32 NumLayers() const {
|
||||
return last_array - base_array + 1;
|
||||
}
|
||||
|
||||
u32 NumLevels() const {
|
||||
return last_level + 1;
|
||||
}
|
||||
|
||||
DataFormat GetDataFmt() const noexcept {
|
||||
return static_cast<DataFormat>(data_format.Value());
|
||||
}
|
||||
|
|
|
@ -287,7 +287,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eR8G8B8A8Srgb;
|
||||
return vk::Format::eB8G8R8A8Srgb;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
|
@ -304,6 +304,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR8Unorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eBc3SrgbBlock;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
|
|
@ -75,8 +75,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
|
||||
.cullMode = LiverpoolToVK::CullMode(key.cull_mode),
|
||||
.frontFace = vk::FrontFace::eClockwise,
|
||||
.cullMode = vk::CullModeFlagBits::eNone, /*LiverpoolToVK::CullMode(key.cull_mode),*/
|
||||
.frontFace = key.front_face == Liverpool::FrontFace::Clockwise
|
||||
? vk::FrontFace::eClockwise
|
||||
: vk::FrontFace::eCounterClockwise,
|
||||
.depthBiasEnable = false,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
|
@ -177,14 +179,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
|
||||
for (u32 i = 0; i < num_color_formats; i++) {
|
||||
const auto& control = key.blend_controls[i];
|
||||
const auto src_color = LiverpoolToVK::BlendFactor(control.color_src_factor);
|
||||
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
|
||||
const auto color_blend = LiverpoolToVK::BlendOp(control.color_func);
|
||||
attachments[i] = vk::PipelineColorBlendAttachmentState{
|
||||
.blendEnable = key.blend_controls[i].enable,
|
||||
.srcColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_src_factor),
|
||||
.dstColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor),
|
||||
.colorBlendOp = LiverpoolToVK::BlendOp(control.color_func),
|
||||
.srcAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.alpha_src_factor),
|
||||
.dstAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor),
|
||||
.alphaBlendOp = LiverpoolToVK::BlendOp(control.alpha_func),
|
||||
.srcColorBlendFactor = src_color,
|
||||
.dstColorBlendFactor = dst_color,
|
||||
.colorBlendOp = color_blend,
|
||||
.srcAlphaBlendFactor = control.separate_alpha_blend
|
||||
? LiverpoolToVK::BlendFactor(control.alpha_src_factor)
|
||||
: src_color,
|
||||
.dstAlphaBlendFactor = control.separate_alpha_blend
|
||||
? LiverpoolToVK::BlendFactor(control.alpha_dst_factor)
|
||||
: dst_color,
|
||||
.alphaBlendOp = control.separate_alpha_blend
|
||||
? LiverpoolToVK::BlendOp(control.alpha_func)
|
||||
: color_blend,
|
||||
.colorWriteMask =
|
||||
instance.IsColorWriteEnableSupported()
|
||||
? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||
|
|
|
@ -38,6 +38,8 @@ struct GraphicsPipelineKey {
|
|||
Liverpool::PrimitiveType prim_type;
|
||||
Liverpool::PolygonMode polygon_mode;
|
||||
Liverpool::CullMode cull_mode;
|
||||
Liverpool::FrontFace front_face;
|
||||
u32 pad{};
|
||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
|
||||
|
|
|
@ -207,6 +207,7 @@ bool Instance::CreateDevice() {
|
|||
.shaderDrawParameters = true,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan12Features{
|
||||
.scalarBlockLayout = true,
|
||||
.timelineSemaphore = true,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan13Features{
|
||||
|
|
|
@ -94,6 +94,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
key.prim_type = regs.primitive_type;
|
||||
key.polygon_mode = regs.polygon_control.PolyMode();
|
||||
key.cull_mode = regs.polygon_control.CullingMode();
|
||||
key.front_face = regs.polygon_control.front_face;
|
||||
|
||||
const auto& db = regs.depth_buffer;
|
||||
key.depth_format = key.depth.depth_enable
|
||||
|
@ -163,10 +164,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
||||
infos[i] = &programs[i].info;
|
||||
|
||||
// Set module name to hash in renderdoc
|
||||
const auto name = fmt::format("{}_{:#x}", stage, hash);
|
||||
const vk::DebugUtilsObjectNameInfoEXT name_info = {
|
||||
.objectType = vk::ObjectType::eShaderModule,
|
||||
.objectHandle = std::bit_cast<u64>(stages[i]),
|
||||
.pObjectName = name.c_str(),
|
||||
};
|
||||
instance.GetDevice().setDebugUtilsObjectNameEXT(name_info);
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv_code, hash, stage, "spv");
|
||||
}
|
||||
|
|
|
@ -85,6 +85,7 @@ void Rasterizer::Draw(bool is_indexed) {
|
|||
}
|
||||
|
||||
void Rasterizer::DispatchDirect() {
|
||||
compute_done = true;
|
||||
return;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const auto& cs_program = liverpool->regs.cs_program;
|
||||
|
|
|
@ -49,6 +49,7 @@ private:
|
|||
Core::MemoryManager* memory;
|
||||
PipelineCache pipeline_cache;
|
||||
StreamBuffer vertex_index_buffer;
|
||||
bool compute_done{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -39,7 +39,9 @@ using Libraries::VideoOut::TilingMode;
|
|||
if (false /*&& IsDepthStencilFormat(format)*/) {
|
||||
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||
if (format != vk::Format::eBc3SrgbBlock) {
|
||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
}
|
||||
return usage;
|
||||
}
|
||||
|
@ -101,8 +103,10 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
|||
size.width = image.width + 1;
|
||||
size.height = image.height + 1;
|
||||
size.depth = 1;
|
||||
pitch = image.Pitch();
|
||||
resources.levels = image.NumLevels();
|
||||
resources.layers = image.NumLayers();
|
||||
// TODO: Derive this properly from tiling params
|
||||
pitch = size.width;
|
||||
guest_size_bytes = size.width * size.height * 4;
|
||||
}
|
||||
|
||||
|
@ -183,7 +187,7 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
|
|||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
}};
|
||||
|
|
|
@ -14,8 +14,9 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) {
|
|||
case AmdGpu::ImageType::Color1DArray:
|
||||
return vk::ImageViewType::e1DArray;
|
||||
case AmdGpu::ImageType::Color2D:
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return vk::ImageViewType::e2D;
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return vk::ImageViewType::eCube;
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
return vk::ImageViewType::e2DArray;
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
|
@ -47,10 +48,10 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
|||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
|
||||
type = ConvertImageViewType(image.type);
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
range.base.level = image.base_level;
|
||||
range.base.level = 0;
|
||||
range.base.layer = 0;
|
||||
range.extent.levels = 1;
|
||||
range.extent.layers = 1;
|
||||
range.extent.levels = image.NumLevels();
|
||||
range.extent.layers = image.NumLayers();
|
||||
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
|
||||
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
|
||||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
|
|
|
@ -175,48 +175,94 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
// Mark image as validated.
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
// Upload data to the staging buffer.
|
||||
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||
if (image.info.is_tiled) {
|
||||
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
|
||||
Config::isNeoMode());
|
||||
} else {
|
||||
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
||||
}
|
||||
staging.Commit(image.info.guest_size_bytes);
|
||||
{
|
||||
|
||||
// Copy to the image.
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
// Upload data to the staging buffer.
|
||||
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||
if (image.info.is_tiled) {
|
||||
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
|
||||
Config::isNeoMode());
|
||||
} else {
|
||||
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
||||
}
|
||||
staging.Commit(image.info.guest_size_bytes);
|
||||
|
||||
// Copy to the image.
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
||||
};
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
};
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
image_copy);
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
return;
|
||||
}
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||
for (u32 l = 0; l < image.info.resources.layers; l++) {
|
||||
// Upload data to the staging buffer.
|
||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||
const u32 width = image.info.size.width >> m;
|
||||
const u32 height = image.info.size.height >> m;
|
||||
const u32 map_size = width * height;
|
||||
const auto [data, offset, _] = staging.Map(map_size, 16);
|
||||
if (image.info.is_tiled) {
|
||||
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
|
||||
} else {
|
||||
std::memcpy(data, image_data, map_size);
|
||||
}
|
||||
staging.Commit(map_size);
|
||||
image_data += map_size;
|
||||
|
||||
// Copy to the image.
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = l,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, 1},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue