video_core: Bringup some basic functionality (#145)

* video_core: Remove hack in rasterizer

* The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers

* shader_recompiler: Implement attribute loads/stores

* video_core: Add basic vertex, index buffer handling and pipeline caching

* externals: Make xxhash lowercase
This commit is contained in:
TheTurtle 2024-05-25 15:33:15 +03:00 committed by GitHub
parent e9f64bb76c
commit 3c90b8ac00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1030 additions and 383 deletions

View file

@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
rasterizer->DrawIndex();
rasterizer->Draw(true);
}
break;
}
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex();
if (rasterizer) {
rasterizer->Draw(false);
}
break;
}
case PM4ItOpcode::DispatchDirect: {

View file

@ -6,6 +6,7 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include <array>
#include <condition_variable>
@ -32,13 +33,13 @@ struct Liverpool {
static constexpr u32 NumColorBuffers = 8;
static constexpr u32 NumViewports = 16;
static constexpr u32 NumClipPlanes = 6;
static constexpr u32 NumWordsShaderUserData = 16;
static constexpr u32 NumShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000;
using UserData = std::array<u32, NumWordsShaderUserData>;
using UserData = std::array<u32, NumShaderUserData>;
struct ShaderProgram {
u32 address_lo;
@ -57,6 +58,14 @@ struct Liverpool {
}
};
union PsInputControl {
u32 raw;
BitField<0, 5, u32> input_offset;
BitField<5, 1, u32> use_default;
BitField<8, 2, u32> default_value;
BitField<10, 1, u32> flat_shade;
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
@ -171,25 +180,6 @@ struct Liverpool {
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
};
union DepthSize {
u32 raw;
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
u32 Pitch() const {
return (pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (height_tile_max + 1) << 3;
}
};
union DepthSlice {
u32 raw;
BitField<0, 22, u32> slice_tile_max;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
@ -227,9 +217,45 @@ struct Liverpool {
BitField<24, 8, u32> stencil_op_val;
};
union StencilInfo {
u32 raw;
BitField<0, 1, u32> format;
struct DepthBuffer {
enum class ZFormat : u32 {
Invald = 0,
Z16 = 1,
Z32Float = 2,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
union {
BitField<0, 2, ZFormat> format;
BitField<2, 2, u32> num_samples;
BitField<13, 3, u32> tile_split;
} z_info;
union {
BitField<0, 1, StencilFormat> format;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
union {
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
} depth_size;
union {
BitField<0, 22, u32> tile_max;
} depth_slice;
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
};
enum class ClipSpace : u32 {
@ -423,39 +449,6 @@ struct Liverpool {
Swap8In64 = 3,
};
enum class Format : u32 {
Invalid = 0,
Color_8 = 1,
Color_16 = 2,
Color_8_8 = 3,
Color_32 = 4,
Color_16_16 = 5,
Color_10_11_11 = 6,
Color_11_11_10 = 7,
Color_10_10_10_2 = 8,
Color_2_10_10_10 = 9,
Color_8_8_8_8 = 10,
Color_32_32 = 11,
Color_16_16_16_16 = 12,
Color_32_32_32_32 = 14,
Color_5_6_5 = 16,
Color_1_5_5_5 = 17,
Color_5_5_5_1 = 18,
Color_4_4_4_4 = 19,
Color_8_24 = 20,
Color_24_8 = 21,
Color_X24_8_32_FL = 22,
};
enum class NumberType : u32 {
Unorm = 0,
Snorm = 1,
Uint = 4,
Sint = 5,
Srgb = 6,
Float = 7,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
@ -482,9 +475,9 @@ struct Liverpool {
} view;
union {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, Format> format;
BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberType> number_type;
BitField<8, 2, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;
@ -529,6 +522,12 @@ struct Liverpool {
u64 CmaskAddress() const {
return u64(cmask_base_address) << 8;
}
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
};
enum class PrimitiveType : u32 {
@ -563,14 +562,8 @@ struct Liverpool {
u32 stencil_clear;
u32 depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
StencilInfo stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
DepthSize depth_size;
DepthSlice depth_slice;
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
DepthBuffer depth_buffer;
INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
@ -584,9 +577,12 @@ struct Liverpool {
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NumViewports> viewports;
std::array<ClipUserData, NumClipPlanes> clip_user_data;
INSERT_PADDING_WORDS(0xA1B1 - 0xA187);
INSERT_PADDING_WORDS(0xA191 - 0xA187);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1);
INSERT_PADDING_WORDS(4);
BitField<0, 6, u32> num_interp;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
@ -616,6 +612,17 @@ struct Liverpool {
VgtNumInstances num_instances;
};
std::array<u32, NumRegs> reg_array{};
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &vs_program;
case 4:
return &ps_program;
default:
return nullptr;
}
}
};
Regs regs{};
@ -656,14 +663,16 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);

View file

@ -2,11 +2,45 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "common/assert.h"
#include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu {
u32 getNumComponents(DataFormat format) {
std::string_view NameOf(NumberFormat fmt) {
switch (fmt) {
case NumberFormat::Unorm:
return "Unorm";
case NumberFormat::Snorm:
return "Snorm";
case NumberFormat::Uscaled:
return "Uscaled";
case NumberFormat::Sscaled:
return "Sscaled";
case NumberFormat::Uint:
return "Uint";
case NumberFormat::Sint:
return "Sint";
case NumberFormat::SnormNz:
return "SnormNz";
case NumberFormat::Float:
return "Float";
case NumberFormat::Srgb:
return "Srgb";
case NumberFormat::Ubnorm:
return "Ubnorm";
case NumberFormat::UbnromNz:
return "UbnormNz";
case NumberFormat::Ubint:
return "Ubint";
case NumberFormat::Ubscaled:
return "Unscaled";
default:
UNREACHABLE();
}
}
u32 NumComponents(DataFormat format) {
constexpr std::array numComponentsPerElement = {
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,

View file

@ -3,6 +3,8 @@
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
@ -59,6 +61,18 @@ enum class NumberFormat : u32 {
Ubscaled = 13,
};
u32 getNumComponents(DataFormat format);
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
u32 NumComponents(DataFormat format);
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::NumberFormat> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
}
};

View file

@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
return vk::PrimitiveTopology::eTriangleListWithAdjacency;
case Liverpool::PrimitiveType::AdjTriangleStrip:
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
case Liverpool::PrimitiveType::QuadList:
// Needs to generate index buffer on the fly.
return vk::PrimitiveTopology::eTriangleList;
default:
UNREACHABLE();
return vk::PrimitiveTopology::eTriangleList;
@ -110,4 +113,42 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
}
}
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
}
UNREACHABLE();
}
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format) {
UNREACHABLE();
}
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
static constexpr u16 NumVerticesPerQuad = 4;
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 3;
}
}
} // namespace Vulkan::LiverpoolToVK

View file

@ -4,6 +4,7 @@
#pragma once
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan::LiverpoolToVK {
@ -20,4 +21,11 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
} // namespace Vulkan::LiverpoolToVK

View file

@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
if (!frame) {
if (!splash_img.has_value()) {
VideoCore::ImageInfo info{};
info.pixel_format = vk::Format::eR8G8B8A8Srgb;
info.type = vk::ImageType::e2D;
@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr
}
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
// Request a free presentation frame.
Frame* frame = GetRenderFrame();

View file

@ -4,22 +4,58 @@
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_,
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_,
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
const PipelineKey& key_, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules)
: instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} {
: instance{instance_}, scheduler{scheduler_}, key{key_} {
const vk::Device device = instance.GetDevice();
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!infos[i]) {
continue;
}
stages[i] = *infos[i];
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = u32(buffer.stride),
.inputRate = vk::VertexInputRate::eVertex,
});
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 0U,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0U,
.pVertexAttributeDescriptions = nullptr,
.vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
.pVertexAttributeDescriptions = attributes.data(),
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pName = "main",
};
const vk::Format color_format = vk::Format::eB8G8R8A8Srgb;
const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = 1,
.pColorAttachmentFormats = &color_format,
.depthAttachmentFormat = vk::Format::eUndefined,
.colorAttachmentCount = num_color_formats,
.pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
.stencilAttachmentFormat = vk::Format::eUndefined,
};
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = pipeline_layout,
.layout = *pipeline_layout,
};
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
const auto& vs_info = stages[0];
const size_t num_buffers = vs_info.vs_inputs.size();
for (u32 i = 0; i < num_buffers; ++i) {
const auto& input = vs_info.vs_inputs[i];
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
}
} // namespace Vulkan

View file

@ -1,19 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace Vulkan {
static constexpr u32 MaxVertexBufferCount = 32;
static constexpr u32 MaxShaderStages = 5;
class Instance;
class Scheduler;
using Liverpool = AmdGpu::Liverpool;
struct PipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
vk::Format depth_format;
Liverpool::DepthControl depth;
Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front;
@ -21,26 +33,41 @@ struct PipelineKey {
Liverpool::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode;
Liverpool::CullMode cull_mode;
bool operator==(const PipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
}
};
static_assert(std::has_unique_object_representations_v<PipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key,
vk::PipelineCache pipeline_cache, vk::PipelineLayout layout,
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
const PipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline();
void BindResources(Core::MemoryManager* memory) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline;
}
private:
const Instance& instance;
Scheduler& scheduler;
vk::UniquePipeline pipeline;
vk::PipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout;
std::array<Shader::Info, MaxShaderStages> stages;
PipelineKey key;
};
} // namespace Vulkan
template <>
struct std::hash<Vulkan::PipelineKey> {
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
return XXH3_64bits(&key, sizeof(key));
}
};

View file

@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
const std::string api_version = GetReadableVersion(properties.apiVersion);
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
}
void Instance::CollectToolingInfo() {

View file

@ -1,9 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h"
#include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -11,60 +13,123 @@
namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
info.user_data = user_data;
info.stage = stage;
switch (stage) {
case Shader::Stage::Fragment: {
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
.is_default = bool(regs.ps_inputs[i].use_default),
.is_flat = bool(regs.ps_inputs[i].flat_shade),
.default_value = regs.ps_inputs[i].default_value,
});
}
break;
}
default:
break;
}
return info;
}
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096},
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} {
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
}
void PipelineCache::BindPipeline() {
SCOPE_EXIT {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
};
const GraphicsPipeline* PipelineCache::GetPipeline() {
RefreshKey();
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) {
it.value() = CreatePipeline();
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
}
if (pipeline) {
return;
void PipelineCache::RefreshKey() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth = regs.depth_control;
key.stencil = regs.stencil_control;
key.stencil_ref_front = regs.stencil_ref_front;
key.stencil_ref_back = regs.stencil_ref_back;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
: vk::Format::eUndefined;
for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
const auto& cb = regs.color_buffers[i];
key.color_formats[i] = cb.base_address
? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
: vk::Format::eUndefined;
}
const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) {
const u32* token = pgm.Address<u32>();
for (u32 i = 0; i < MaxShaderStages; i++) {
auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
key.stage_hashes[i] = 0;
continue;
}
const u32* code = pgm->Address<u32>();
// Retrieve shader header.
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo));
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
key.stage_hashes[i] = bininfo.shader_hash;
}
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
const auto& regs = liverpool->regs;
std::array<Shader::IR::Program, MaxShaderStages> programs;
std::array<const Shader::Info*, MaxShaderStages> infos{};
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!graphics_key.stage_hashes[i]) {
stages[i] = VK_NULL_HANDLE;
continue;
}
auto* pgm = regs.ProgramForStage(i);
const u32* code = pgm->Address<u32>();
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
// Lookup if the shader already exists.
const auto it = module_map.find(bininfo.shader_hash);
if (it != module_map.end()) {
return *it->second;
stages[i] = *it->second;
continue;
}
// Compile and cache shader.
const auto data = std::span{token, bininfo.length / sizeof(u32)};
const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data);
return CompileSPV(program, instance.GetDevice());
};
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
// Retrieve shader stage modules.
// TODO: Only do this when program address is changed.
stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
// Recompile shader to IR.
const auto stage = Shader::Stage{i};
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
std::move(info));
// Bind pipeline.
// TODO: Read entire key based on reg state.
graphics_key.prim_type = liverpool->regs.primitive_type;
graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
*pipeline_layout, stages);
// Compile IR to SPIR-V
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
}
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
infos, stages);
}
} // namespace Vulkan

View file

@ -8,6 +8,10 @@
#include "shader_recompiler/object_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Shader {
struct Info;
}
namespace Vulkan {
class Instance;
@ -21,7 +25,12 @@ public:
AmdGpu::Liverpool* liverpool);
~PipelineCache() = default;
void BindPipeline();
const GraphicsPipeline* GetPipeline();
private:
void RefreshKey();
std::unique_ptr<GraphicsPipeline> CreatePipeline();
private:
const Instance& instance;
@ -31,7 +40,7 @@ private:
vk::UniquePipelineLayout pipeline_layout;
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
std::array<vk::ShaderModule, MaxShaderStages> stages{};
std::unique_ptr<GraphicsPipeline> pipeline;
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
PipelineKey graphics_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool;

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -18,33 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
memory->SetInstance(&instance);
}
Rasterizer::~Rasterizer() = default;
void Rasterizer::DrawIndex() {
void Rasterizer::Draw(bool is_indexed) {
const auto cmdbuf = scheduler.CommandBuffer();
auto& regs = liverpool->regs;
static bool first_time = true;
if (first_time) {
first_time = false;
return;
}
UpdateDynamicState();
pipeline_cache.BindPipeline();
const u32 pitch = regs.color_buffers[0].Pitch();
const u32 height = regs.color_buffers[0].Height();
const u32 tile_max = regs.color_buffers[0].slice.tile_max;
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch);
const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed);
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
pipeline->BindResources(memory);
const vk::RenderingAttachmentInfo color_info = {
.imageView = *image_view.image_view,
@ -61,13 +54,50 @@ void Rasterizer::DrawIndex() {
.pColorAttachments = &color_info,
};
UpdateDynamicState();
cmdbuf.beginRendering(rendering_info);
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32);
cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0));
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();
}
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
// Emit indices.
const u32 index_size = 3 * regs.num_indices;
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
vertex_index_buffer.Commit(index_size);
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
return index_size / sizeof(u16);
}
if (!is_indexed) {
return 0;
}
const VAddr index_address = regs.index_base_address.Address();
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
const vk::IndexType index_type =
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
: vk::IndexType::eUint32;
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
return regs.num_indices;
}
void Rasterizer::UpdateDynamicState() {
UpdateViewportScissorState();
}

View file

@ -3,7 +3,6 @@
#pragma once
#include <memory>
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -11,6 +10,10 @@ namespace AmdGpu {
struct Liverpool;
}
namespace Core {
class MemoryManager;
}
namespace VideoCore {
class TextureCache;
}
@ -26,17 +29,14 @@ public:
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
~Rasterizer();
/// Performs a draw call with an index buffer.
void DrawIndex();
/// Updates graphics state that is not part of the bound pipeline.
void UpdateDynamicState();
void Draw(bool is_indexed);
private:
/// Updates viewport and scissor from liverpool registers.
void UpdateViewportScissorState();
u32 SetupIndexBuffer(bool& is_indexed);
void MapMemory(VAddr addr, size_t size);
/// Updates depth and stencil pipeline state from liverpool registers.
void UpdateDynamicState();
void UpdateViewportScissorState();
void UpdateDepthStencilState();
private:
@ -44,6 +44,7 @@ private:
Scheduler& scheduler;
VideoCore::TextureCache& texture_cache;
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer;
};

View file

@ -35,7 +35,7 @@ public:
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);

View file

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h"
@ -65,6 +66,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
}
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
type = vk::ImageType::e2D;
size.width = buffer.Pitch();
size.height = buffer.Height();
pitch = size.width;
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}

View file

@ -6,6 +6,7 @@
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined;

View file

@ -101,8 +101,8 @@ TextureCache::~TextureCache() {
}
void TextureCache::OnCpuWrite(VAddr address) {
const VAddr address_aligned = address & ~((1 << PageBits) - 1);
ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) {
const VAddr address_aligned = address & ~((1 << PageShift) - 1);
ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
@ -137,26 +137,20 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
return image;
}
ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) {
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) {
if (image.cpu_addr == cpu_address) {
image_ids.push_back(image_id);
}
});
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
const ImageInfo info{buffer};
auto& image = FindImage(info, buffer.Address());
ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!");
auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()];
ImageViewInfo info;
info.format = vk::Format::eB8G8R8A8Srgb;
if (const ImageViewId view_id = image->FindView(info); view_id) {
ImageViewInfo view_info;
view_info.format = info.pixel_format;
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
return slot_image_views[view_id];
}
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image);
image->image_view_infos.emplace_back(info);
image->image_view_ids.emplace_back(view_id);
const ImageViewId view_id =
slot_image_views.insert(instance, scheduler, view_info, image.image);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
}
@ -225,13 +219,13 @@ void TextureCache::UnregisterImage(ImageId image_id) {
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits);
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
return;
}
auto& image_ids = page_it.value();
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits);
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
return;
}
image_ids.erase(vector_it);

View file

@ -37,7 +37,7 @@ public:
Image& FindImage(const ImageInfo& info, VAddr cpu_address);
/// Retrieves the render target with specified properties
ImageView& RenderTarget(VAddr cpu_address, u32 pitch);
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
/// Reuploads image contents.
void RefreshImage(Image& image);