mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-29 23:03:18 +00:00
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert * shader_recompiler: Add bit instructions * shader_recompiler: More data share instructions * shader_recompiler: Remove exec contexts, fix S_MOV_B64 * shader_recompiler: Split instruction parsing into categories * shader_recompiler: Better BFS search * shader_recompiler: Constant propagation pass for cmp_class_f32 * shader_recompiler: Partial readfirstlane implementation * shader_recompiler: Stub readlane/writelane only for non-compute * hack: Fix swizzle on RDR * Will properly fix this when merging this * clang format * address_space: Bump user area size to full * shader_recompiler: V_INTERP_MOV_F32 * Should work the same as spirv will emit flat decoration on demand * kernel: Add MAP_OP_MAP_FLEXIBLE * image_view: Attempt to apply storage swizzle on format * vk_scheduler: Barrier attachments on renderpass end * clang format * liverpool: cs state backup * shader_recompiler: More instructions and formats * vector_alu: Proper V_MBCNT_U32_B32 * shader_recompiler: Port some dark souls things * file_system: Implement sceKernelRename * more formats * clang format * resource_tracking_pass: Back to assert * translate: Tracedata * kernel: Remove tracy lock * Solves random crashes in Dark Souls * code: Review comments
This commit is contained in:
parent
ac6dc20c3b
commit
a7c9bfa5c5
66 changed files with 1349 additions and 904 deletions
|
@ -403,9 +403,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -506,9 +508,11 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -529,7 +533,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
}
|
||||
|
||||
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
static constexpr u32 GfxQueueId = 0u;
|
||||
auto& queue = mapped_queues[GfxQueueId];
|
||||
|
||||
auto task = ProcessGraphics(dcb, ccb);
|
||||
|
|
|
@ -36,6 +36,7 @@ namespace AmdGpu {
|
|||
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
||||
|
||||
struct Liverpool {
|
||||
static constexpr u32 GfxQueueId = 0u;
|
||||
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
||||
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
||||
static constexpr u32 NumQueuesPerPipe = 8u;
|
||||
|
@ -1061,6 +1062,7 @@ private:
|
|||
struct GpuQueue {
|
||||
std::mutex m_access{};
|
||||
std::queue<Task::Handle> submits{};
|
||||
ComputeProgram cs_state{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
|
||||
|
|
|
@ -7,6 +7,77 @@
|
|||
|
||||
namespace AmdGpu {
|
||||
|
||||
std::string_view NameOf(DataFormat fmt) {
|
||||
switch (fmt) {
|
||||
case DataFormat::FormatInvalid:
|
||||
return "FormatInvalid";
|
||||
case DataFormat::Format8:
|
||||
return "Format8";
|
||||
case DataFormat::Format16:
|
||||
return "Format16";
|
||||
case DataFormat::Format8_8:
|
||||
return "Format8_8";
|
||||
case DataFormat::Format32:
|
||||
return "Format32";
|
||||
case DataFormat::Format16_16:
|
||||
return "Format16_16";
|
||||
case DataFormat::Format10_11_11:
|
||||
return "Format10_11_11";
|
||||
case DataFormat::Format11_11_10:
|
||||
return "Format11_11_10";
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return "Format10_10_10_2";
|
||||
case DataFormat::Format2_10_10_10:
|
||||
return "Format2_10_10_10";
|
||||
case DataFormat::Format8_8_8_8:
|
||||
return "Format8_8_8_8";
|
||||
case DataFormat::Format32_32:
|
||||
return "Format32_32";
|
||||
case DataFormat::Format16_16_16_16:
|
||||
return "Format16_16_16_16";
|
||||
case DataFormat::Format32_32_32:
|
||||
return "Format32_32_32";
|
||||
case DataFormat::Format32_32_32_32:
|
||||
return "Format32_32_32_32";
|
||||
case DataFormat::Format5_6_5:
|
||||
return "Format5_6_5";
|
||||
case DataFormat::Format1_5_5_5:
|
||||
return "Format1_5_5_5";
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return "Format5_5_5_1";
|
||||
case DataFormat::Format4_4_4_4:
|
||||
return "Format4_4_4_4";
|
||||
case DataFormat::Format8_24:
|
||||
return "Format8_24";
|
||||
case DataFormat::Format24_8:
|
||||
return "Format24_8";
|
||||
case DataFormat::FormatX24_8_32:
|
||||
return "FormatX24_8_32";
|
||||
case DataFormat::FormatGB_GR:
|
||||
return "FormatGB_GR";
|
||||
case DataFormat::FormatBG_RG:
|
||||
return "FormatBG_RG";
|
||||
case DataFormat::Format5_9_9_9:
|
||||
return "Format5_9_9_9";
|
||||
case DataFormat::FormatBc1:
|
||||
return "FormatBc1";
|
||||
case DataFormat::FormatBc2:
|
||||
return "FormatBc2";
|
||||
case DataFormat::FormatBc3:
|
||||
return "FormatBc3";
|
||||
case DataFormat::FormatBc4:
|
||||
return "FormatBc4";
|
||||
case DataFormat::FormatBc5:
|
||||
return "FormatBc5";
|
||||
case DataFormat::FormatBc6:
|
||||
return "FormatBc6";
|
||||
case DataFormat::FormatBc7:
|
||||
return "FormatBc7";
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
std::string_view NameOf(NumberFormat fmt) {
|
||||
switch (fmt) {
|
||||
case NumberFormat::Unorm:
|
||||
|
|
|
@ -61,6 +61,7 @@ enum class NumberFormat : u32 {
|
|||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||
|
||||
int NumComponents(DataFormat format);
|
||||
|
@ -70,6 +71,16 @@ s32 ComponentOffset(DataFormat format, u32 comp);
|
|||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::DataFormat> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::DataFormat fmt, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::NumberFormat> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
|
|
|
@ -75,7 +75,7 @@ struct Buffer {
|
|||
static_assert(sizeof(Buffer) == 16); // 128bits
|
||||
|
||||
enum class ImageType : u64 {
|
||||
Buffer = 0,
|
||||
Invalid = 0,
|
||||
Color1D = 8,
|
||||
Color2D = 9,
|
||||
Color3D = 10,
|
||||
|
@ -88,8 +88,8 @@ enum class ImageType : u64 {
|
|||
|
||||
constexpr std::string_view NameOf(ImageType type) {
|
||||
switch (type) {
|
||||
case ImageType::Buffer:
|
||||
return "Buffer";
|
||||
case ImageType::Invalid:
|
||||
return "Invalid";
|
||||
case ImageType::Color1D:
|
||||
return "Color1D";
|
||||
case ImageType::Color2D:
|
||||
|
@ -179,6 +179,40 @@ struct Image {
|
|||
return base_address << 8;
|
||||
}
|
||||
|
||||
u32 DstSelect() const {
|
||||
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||
}
|
||||
|
||||
static char SelectComp(u32 sel) {
|
||||
switch (sel) {
|
||||
case 0:
|
||||
return '0';
|
||||
case 1:
|
||||
return '1';
|
||||
case 4:
|
||||
return 'R';
|
||||
case 5:
|
||||
return 'G';
|
||||
case 6:
|
||||
return 'B';
|
||||
case 7:
|
||||
return 'A';
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
std::string DstSelectName() const {
|
||||
std::string result = "[";
|
||||
u32 dst_sel = DstSelect();
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
result += SelectComp(dst_sel & 7);
|
||||
dst_sel >>= 3;
|
||||
}
|
||||
result += ']';
|
||||
return result;
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
return pitch + 1;
|
||||
}
|
||||
|
@ -290,6 +324,7 @@ enum class BorderColor : u64 {
|
|||
// Table 8.12 Sampler Resource Definition
|
||||
struct Sampler {
|
||||
union {
|
||||
u64 raw0;
|
||||
BitField<0, 3, ClampMode> clamp_x;
|
||||
BitField<3, 3, ClampMode> clamp_y;
|
||||
BitField<6, 3, ClampMode> clamp_z;
|
||||
|
@ -309,6 +344,7 @@ struct Sampler {
|
|||
BitField<60, 4, u64> perf_z;
|
||||
};
|
||||
union {
|
||||
u64 raw1;
|
||||
BitField<0, 14, u64> lod_bias;
|
||||
BitField<14, 6, u64> lod_bias_sec;
|
||||
BitField<20, 2, Filter> xy_mag_filter;
|
||||
|
@ -323,6 +359,10 @@ struct Sampler {
|
|||
BitField<62, 2, BorderColor> border_color_type;
|
||||
};
|
||||
|
||||
operator bool() const noexcept {
|
||||
return raw0 != 0 || raw1 != 0;
|
||||
}
|
||||
|
||||
float LodBias() const noexcept {
|
||||
return static_cast<float>(static_cast<int16_t>((lod_bias.Value() ^ 0x2000u) - 0x2000u)) /
|
||||
256.0f;
|
||||
|
|
|
@ -297,6 +297,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eD16Unorm,
|
||||
|
@ -308,6 +309,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8B8A8Uint,
|
||||
vk::Format::eR8G8B8A8Unorm,
|
||||
vk::Format::eR8G8B8A8Snorm,
|
||||
vk::Format::eR8G8B8A8Uscaled,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8G8Uint,
|
||||
|
@ -335,6 +337,10 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR32Sint,
|
||||
vk::Format::eR32Uint,
|
||||
vk::Format::eBc6HUfloatBlock,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16B16A16Sscaled,
|
||||
vk::Format::eR16G16Sscaled,
|
||||
};
|
||||
return formats;
|
||||
}
|
||||
|
@ -384,10 +390,17 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc5UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eBc5SnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16B16A16Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sscaled) {
|
||||
return vk::Format::eR16G16B16A16Sscaled;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
|
@ -496,6 +509,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sscaled) {
|
||||
return vk::Format::eR16G16Sscaled;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Uscaled) {
|
||||
return vk::Format::eR8G8B8A8Uscaled;
|
||||
|
@ -518,6 +535,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::SnormNz) {
|
||||
return vk::Format::eR16G16B16A16Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR8G8B8A8Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc6HUfloatBlock;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : info.samplers) {
|
||||
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
const auto ssharp = sampler.GetSsharp(info);
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -386,7 +386,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : stage.samplers) {
|
||||
auto ssharp = stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
auto ssharp = sampler.GetSsharp(stage);
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = tsharps[sampler.associated_image];
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
|
|
|
@ -164,10 +164,11 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceVulkan13Features,
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||
const vk::StructureChain properties_chain =
|
||||
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
|
||||
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
|
||||
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
|
||||
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
|
||||
|
||||
features = feature_chain.get().features;
|
||||
if (available_extensions.empty()) {
|
||||
|
@ -261,6 +262,7 @@ bool Instance::CreateDevice() {
|
|||
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
||||
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
.shaderInt64 = features.shaderInt64,
|
||||
.shaderInt16 = features.shaderInt16,
|
||||
},
|
||||
},
|
||||
|
|
|
@ -188,6 +188,11 @@ public:
|
|||
return properties.limits.nonCoherentAtomSize;
|
||||
}
|
||||
|
||||
/// Returns the subgroup size of the selected physical device.
|
||||
u32 SubgroupSize() const {
|
||||
return subgroup_size;
|
||||
}
|
||||
|
||||
/// Returns the maximum supported elements in a texel buffer
|
||||
u32 MaxTexelBufferElements() const {
|
||||
return properties.limits.maxTexelBufferElements;
|
||||
|
@ -249,6 +254,7 @@ private:
|
|||
bool workgroup_memory_explicit_layout{};
|
||||
bool color_write_en{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool has_nsight_graphics{};
|
||||
|
|
|
@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
@ -268,7 +269,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
programs[i] =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
|
@ -308,7 +310,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
auto program =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
u32 binding{};
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
@ -128,6 +128,7 @@ void Rasterizer::BeginRendering() {
|
|||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
|
||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
||||
state.color_images[state.num_color_attachments] = image.image;
|
||||
state.color_attachments[state.num_color_attachments++] = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
|
@ -152,6 +153,7 @@ void Rasterizer::BeginRendering() {
|
|||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
state.depth_image = image.image;
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image.layout,
|
||||
|
|
|
@ -50,7 +50,32 @@ void Scheduler::EndRendering() {
|
|||
return;
|
||||
}
|
||||
is_rendering = false;
|
||||
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = render_state.color_images[i],
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
}
|
||||
current_cmdbuf.endRendering();
|
||||
if (!barriers.empty()) {
|
||||
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eFragmentShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::Flush(SubmitInfo& info) {
|
||||
|
|
|
@ -15,7 +15,9 @@ class Instance;
|
|||
|
||||
struct RenderState {
|
||||
std::array<vk::RenderingAttachmentInfo, 8> color_attachments{};
|
||||
std::array<vk::Image, 8> color_images{};
|
||||
vk::RenderingAttachmentInfo depth_attachment{};
|
||||
vk::Image depth_image{};
|
||||
u32 num_color_attachments{};
|
||||
u32 num_depth_attachments{};
|
||||
u32 width = std::numeric_limits<u32>::max();
|
||||
|
|
|
@ -47,6 +47,20 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
|
||||
return (num_components == 1 && dst_sel == 0b100) ||
|
||||
(num_components == 2 && dst_sel == 0b101'100) ||
|
||||
(num_components == 3 && dst_sel == 0b110'101'100) ||
|
||||
(num_components == 4 && dst_sel == 0b111'110'101'100);
|
||||
}
|
||||
|
||||
vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
|
||||
if (format == vk::Format::eR8G8B8A8Unorm && dst_sel == 0b111100101110) {
|
||||
return vk::Format::eB8G8R8A8Unorm;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
|
||||
: is_storage{is_storage} {
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
|
@ -60,9 +74,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce
|
|||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||
// Check for unfortunate case of storage images being swizzled
|
||||
if (is_storage && (mapping != vk::ComponentMapping{})) {
|
||||
LOG_ERROR(Render_Vulkan, "Storage image requires swizzling");
|
||||
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
|
||||
const u32 dst_sel = image.DstSelect();
|
||||
if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) {
|
||||
mapping = vk::ComponentMapping{};
|
||||
if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) {
|
||||
format = new_format;
|
||||
return;
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps,
|
||||
image.DstSelectName());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ struct ImageViewInfo {
|
|||
|
||||
struct Image;
|
||||
|
||||
constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0};
|
||||
|
||||
struct ImageView {
|
||||
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
|
||||
ImageId image_id, std::optional<vk::ImageUsageFlags> usage_override = {});
|
||||
|
|
|
@ -142,14 +142,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
|||
image_ids.push_back(image_id);
|
||||
});
|
||||
|
||||
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
||||
// ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
||||
|
||||
ImageId image_id{};
|
||||
if (image_ids.empty()) {
|
||||
image_id = slot_images.insert(instance, scheduler, info);
|
||||
RegisterImage(image_id);
|
||||
} else {
|
||||
image_id = image_ids[0];
|
||||
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
|
||||
}
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
|
@ -183,12 +183,17 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
if (info.guest_address == 0) [[unlikely]] {
|
||||
return slot_image_views[NULL_IMAGE_VIEW_ID];
|
||||
}
|
||||
|
||||
const ImageId image_id = FindImage(info);
|
||||
Image& image = slot_images[image_id];
|
||||
auto& usage = image.info.usage;
|
||||
|
||||
if (view_info.is_storage) {
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
|
||||
usage.storage = true;
|
||||
} else {
|
||||
const auto new_layout = image.info.IsDepthStencil()
|
||||
|
@ -206,7 +211,7 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo&
|
|||
view_info_tmp.range.extent.levels > image.info.resources.levels ||
|
||||
view_info_tmp.range.extent.layers > image.info.resources.layers) {
|
||||
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})",
|
||||
view_info_tmp.range.base.level, view_info_tmp.range.extent.levels,
|
||||
view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers,
|
||||
|
@ -341,7 +346,7 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead);
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue