mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-30 23:33:17 +00:00
Initial instancing and asynchronous compute queues (#207)
* gnm_driver: added `sceGnmRegisterOwner` and `sceGnmRegisterResource` * video_out: `sceVideoOutGetDeviceCapabilityInfo` for sdk runtime * gnm_driver: correct vqid index range * amdgpu: indirect buffer, release mem and some additional irq modes * amdgpu: added ASC commands processor * shader_recompiler: added support for fetch instance id * amdgpu: classic bitfields for T# representation (debugging experience) * renderer_vulkan: skip zero sized VBs from binding * texture_cache: image upload logic moved into `Image` object * gnm_driver: `sceGnmDingDong` implementation * texture_cache: `Image` usage flags moved; correct VO buffer pitch
This commit is contained in:
parent
a9cbd8287c
commit
cb6b21de1f
19 changed files with 361 additions and 100 deletions
|
@ -12,7 +12,7 @@ namespace AmdGpu {
|
|||
|
||||
static const char* dcb_task_name{"DCB_TASK"};
|
||||
static const char* ccb_task_name{"CCB_TASK"};
|
||||
static const char* asc_task_name{"ACB_TASK"};
|
||||
static const char* acb_task_name{"ACB_TASK"};
|
||||
|
||||
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
||||
|
||||
|
@ -381,6 +381,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
}
|
||||
|
||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
||||
TracyFiberEnter(acb_task_name);
|
||||
|
||||
while (!acb.empty()) {
|
||||
const auto* header = reinterpret_cast<const PM4Header*>(acb.data());
|
||||
const u32 type = header->type;
|
||||
|
@ -393,6 +395,69 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
|||
const PM4ItOpcode opcode = header->type3.opcode;
|
||||
const auto* it_body = reinterpret_cast<const u32*>(header) + 1;
|
||||
switch (opcode) {
|
||||
case PM4ItOpcode::Nop: {
|
||||
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndirectBuffer: {
|
||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||
auto task =
|
||||
ProcessCompute({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
||||
while (!task.handle.done()) {
|
||||
task.handle.resume();
|
||||
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
};
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::AcquireMem: {
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
rasterizer->DispatchDirect();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WriteData: {
|
||||
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
while (!wait_reg_mem->Test()) {
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::ReleaseMem: {
|
||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||
release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <---
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
||||
static_cast<u32>(opcode), count);
|
||||
|
@ -401,7 +466,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
|||
acb = acb.subspan(header->type3.NumWords() + 1);
|
||||
}
|
||||
|
||||
return {}; // Not a coroutine yet
|
||||
TracyFiberLeave;
|
||||
}
|
||||
|
||||
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
|
|
|
@ -265,6 +265,7 @@ enum class InterruptSelect : u32 {
|
|||
None = 0,
|
||||
IrqOnly = 1,
|
||||
IrqWhenWriteConfirm = 2,
|
||||
IrqUndocumented = 3,
|
||||
};
|
||||
|
||||
struct PM4CmdEventWriteEop {
|
||||
|
@ -299,6 +300,9 @@ struct PM4CmdEventWriteEop {
|
|||
|
||||
void SignalFence() const {
|
||||
switch (data_sel.Value()) {
|
||||
case DataSelect::None: {
|
||||
break;
|
||||
}
|
||||
case DataSelect::Data32Low: {
|
||||
*Address<u32>() = DataDWord();
|
||||
break;
|
||||
|
@ -321,6 +325,9 @@ struct PM4CmdEventWriteEop {
|
|||
// No interrupt
|
||||
break;
|
||||
}
|
||||
case InterruptSelect::IrqOnly:
|
||||
ASSERT(data_sel == DataSelect::None);
|
||||
[[fallthrough]];
|
||||
case InterruptSelect::IrqWhenWriteConfirm: {
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop);
|
||||
break;
|
||||
|
@ -559,4 +566,105 @@ struct PM4CmdDrawIndexBase {
|
|||
u32 addr_hi;
|
||||
};
|
||||
|
||||
struct PM4CmdIndirectBuffer {
|
||||
PM4Type3Header header;
|
||||
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned
|
||||
union {
|
||||
BitField<0, 16, u32> ibase_hi; ///< Indirect buffer base address
|
||||
u32 dw1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 20, u32> ib_size; ///< Indirect buffer size
|
||||
BitField<20, 1, u32> chain; ///< set to chain to IB allocations
|
||||
BitField<24, 8, u32> vmid; ///< Virtual memory domain ID for command buffer
|
||||
u32 dw2;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>((u64(ibase_hi) << 32u) | ibase_lo);
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdReleaseMem {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR
|
||||
BitField<8, 4, u32> event_index; ///< Event index
|
||||
BitField<12, 1, u32> tcl1_vol_action_ena;
|
||||
BitField<13, 1, u32> tc_vol_action_ena;
|
||||
BitField<15, 1, u32> tc_wb_action_ena;
|
||||
BitField<16, 1, u32> tcl1__action_ena;
|
||||
BitField<17, 1, u32> tc_action_ena;
|
||||
BitField<25, 2, u32> cache_policy; ///< Cache Policy setting used for writing fences and
|
||||
///< timestamps to the TCL2
|
||||
u32 dw1;
|
||||
};
|
||||
union {
|
||||
BitField<16, 2, u32> dst_sel; ///< destination select
|
||||
BitField<24, 3, InterruptSelect> int_sel; ///< selects interrupt action for end-of-pipe
|
||||
BitField<29, 3, DataSelect> data_sel; ///< selects source of data
|
||||
u32 dw2;
|
||||
};
|
||||
u32 address_lo; ///< low bits of address
|
||||
u32 address_hi; ///< high bits of address
|
||||
union {
|
||||
struct {
|
||||
u16 gds_index; ///< Byte offset into GDS to copy from
|
||||
u16 num_dw; ///< Number of DWORDS of GDS to copy
|
||||
};
|
||||
u32 data_lo; ///< value that will be written to memory when event occurs
|
||||
};
|
||||
u32 data_hi;
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>(address_lo | u64(address_hi) << 32);
|
||||
}
|
||||
|
||||
u32 DataDWord() const {
|
||||
return data_lo;
|
||||
}
|
||||
|
||||
u64 DataQWord() const {
|
||||
return data_lo | u64(data_hi) << 32;
|
||||
}
|
||||
|
||||
void SignalFence(Platform::InterruptId irq_id) const {
|
||||
switch (data_sel.Value()) {
|
||||
case DataSelect::Data32Low: {
|
||||
*Address<u32>() = DataDWord();
|
||||
break;
|
||||
}
|
||||
case DataSelect::Data64: {
|
||||
*Address<u64>() = DataQWord();
|
||||
break;
|
||||
}
|
||||
case DataSelect::PerfCounter: {
|
||||
*Address<u64>() = Common::FencedRDTSC();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
switch (int_sel.Value()) {
|
||||
case InterruptSelect::None: {
|
||||
// No interrupt
|
||||
break;
|
||||
}
|
||||
case InterruptSelect::IrqUndocumented:
|
||||
[[fallthrough]];
|
||||
case InterruptSelect::IrqWhenWriteConfirm: {
|
||||
Platform::IrqC::Instance()->Signal(irq_id);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
|
|
@ -46,6 +46,7 @@ enum class PM4ItOpcode : u32 {
|
|||
EventWrite = 0x46,
|
||||
EventWriteEop = 0x47,
|
||||
EventWriteEos = 0x48,
|
||||
ReleaseMem = 0x49,
|
||||
PremableCntl = 0x4A,
|
||||
DmaData = 0x50,
|
||||
ContextRegRmw = 0x51,
|
||||
|
|
|
@ -108,36 +108,39 @@ constexpr std::string_view NameOf(TilingMode type) {
|
|||
}
|
||||
|
||||
struct Image {
|
||||
union {
|
||||
BitField<0, 38, u64> base_address;
|
||||
BitField<40, 12, u64> min_lod;
|
||||
BitField<52, 6, u64> data_format;
|
||||
BitField<58, 4, u64> num_format;
|
||||
BitField<62, 2, u64> mtype;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u64> width;
|
||||
BitField<14, 14, u64> height;
|
||||
BitField<28, 3, u64> perf_modulation;
|
||||
BitField<31, 1, u64> interlaced;
|
||||
BitField<32, 3, u64> dst_sel_x;
|
||||
BitField<35, 3, u64> dst_sel_y;
|
||||
BitField<38, 3, u64> dst_sel_z;
|
||||
BitField<41, 3, u64> dst_sel_w;
|
||||
BitField<44, 4, u64> base_level;
|
||||
BitField<48, 4, u64> last_level;
|
||||
BitField<52, 5, u64> tiling_index;
|
||||
BitField<57, 1, u64> pow2pad;
|
||||
BitField<58, 1, u64> mtype2;
|
||||
BitField<59, 1, u64> atc;
|
||||
BitField<60, 4, ImageType> type;
|
||||
};
|
||||
union {
|
||||
BitField<0, 13, u64> depth;
|
||||
BitField<13, 14, u64> pitch;
|
||||
BitField<32, 13, u64> base_array;
|
||||
BitField<45, 13, u64> last_array;
|
||||
};
|
||||
u64 base_address : 38;
|
||||
u64 mtype_l2 : 2;
|
||||
u64 min_lod : 12;
|
||||
u64 data_format : 6;
|
||||
u64 num_format : 4;
|
||||
u64 mtype : 2;
|
||||
|
||||
u64 width : 14;
|
||||
u64 height : 14;
|
||||
u64 perf_modulation : 3;
|
||||
u64 interlaced : 1;
|
||||
u64 dst_sel_x : 3;
|
||||
u64 dst_sel_y : 3;
|
||||
u64 dst_sel_z : 3;
|
||||
u64 dst_sel_w : 3;
|
||||
u64 base_level : 4;
|
||||
u64 last_level : 4;
|
||||
u64 tiling_index : 5;
|
||||
u64 pow2pad : 1;
|
||||
u64 mtype2 : 1;
|
||||
u64 atc : 1;
|
||||
u64 type : 4;
|
||||
|
||||
u64 depth : 13;
|
||||
u64 pitch : 14;
|
||||
u64 : 5;
|
||||
u64 base_array : 13;
|
||||
u64 last_array : 13;
|
||||
u64 : 6;
|
||||
u64 min_lod_warn : 12;
|
||||
u64 counter_bank_id : 8;
|
||||
u64 lod_hw_cnt_en : 1;
|
||||
u64 : 43;
|
||||
|
||||
VAddr Address() const {
|
||||
return base_address << 8;
|
||||
|
@ -148,8 +151,8 @@ struct Image {
|
|||
}
|
||||
|
||||
u32 NumLayers() const {
|
||||
u32 slices = type == ImageType::Color3D ? 1 : depth.Value() + 1;
|
||||
if (type == ImageType::Cube) {
|
||||
u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1;
|
||||
if (GetType() == ImageType::Cube) {
|
||||
slices *= 6;
|
||||
}
|
||||
if (pow2pad) {
|
||||
|
@ -159,33 +162,38 @@ struct Image {
|
|||
}
|
||||
|
||||
u32 NumLevels() const {
|
||||
if (type == ImageType::Color2DMsaa || type == ImageType::Color2DMsaaArray) {
|
||||
if (GetType() == ImageType::Color2DMsaa || GetType() == ImageType::Color2DMsaaArray) {
|
||||
return 1;
|
||||
}
|
||||
return last_level + 1;
|
||||
}
|
||||
|
||||
ImageType GetType() const noexcept {
|
||||
return static_cast<ImageType>(type);
|
||||
}
|
||||
|
||||
DataFormat GetDataFmt() const noexcept {
|
||||
return static_cast<DataFormat>(data_format.Value());
|
||||
return static_cast<DataFormat>(data_format);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
return static_cast<NumberFormat>(num_format.Value());
|
||||
return static_cast<NumberFormat>(num_format);
|
||||
}
|
||||
|
||||
[[nodiscard]] TilingMode GetTilingMode() const {
|
||||
return static_cast<TilingMode>(tiling_index.Value());
|
||||
TilingMode GetTilingMode() const {
|
||||
return static_cast<TilingMode>(tiling_index);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsTiled() const {
|
||||
bool IsTiled() const {
|
||||
return GetTilingMode() != TilingMode::Display_Linear;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetSizeAligned() const {
|
||||
size_t GetSizeAligned() const {
|
||||
// TODO: Derive this properly from tiling params
|
||||
return (width + 1) * (height + 1) * NumComponents(GetDataFmt());
|
||||
return Pitch() * (height + 1) * NumComponents(GetDataFmt());
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Image) == 32); // 256bits
|
||||
|
||||
// 8.2.7. Image Sampler [RDNA 2 Instruction Set Architecture]
|
||||
enum class ClampMode : u64 {
|
||||
|
|
|
@ -53,7 +53,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
bindings.push_back({
|
||||
.binding = input.binding,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = vk::VertexInputRate::eVertex,
|
||||
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
|
||||
? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -402,8 +404,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
|||
// Calculate buffers memory overlaps
|
||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
const auto& buffer = guest_buffers.emplace_back(
|
||||
vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset));
|
||||
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
ranges.emplace_back(buffer.base_address.Value(),
|
||||
buffer.base_address.Value() + buffer.GetSize());
|
||||
}
|
||||
|
|
|
@ -116,11 +116,13 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
|
|||
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
|
||||
const auto& attrib = group.attrib;
|
||||
is_tiled = attrib.tiling_mode == TilingMode::Tile;
|
||||
tiling_mode =
|
||||
is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear;
|
||||
pixel_format = ConvertPixelFormat(attrib.pixel_format);
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = attrib.width;
|
||||
size.height = attrib.height;
|
||||
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
|
||||
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127);
|
||||
const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
|
||||
ASSERT(is_32bpp);
|
||||
if (!is_tiled) {
|
||||
|
@ -128,11 +130,11 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
|
|||
return;
|
||||
}
|
||||
if (Config::isNeoMode()) {
|
||||
guest_size_bytes = pitch * 128 * ((size.height + 127) & (~127)) * 4;
|
||||
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
|
||||
} else {
|
||||
guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4;
|
||||
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
|
||||
}
|
||||
is_vo_surface = true;
|
||||
usage.vo_buffer = true;
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
|
@ -140,12 +142,14 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
|||
is_tiled = buffer.IsTiled();
|
||||
tiling_mode = buffer.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
|
||||
num_samples = 1 << buffer.attrib.num_fragments_log2;
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||
size.depth = 1;
|
||||
pitch = size.width;
|
||||
guest_size_bytes = buffer.GetSizeAligned();
|
||||
usage.render_target = true;
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
|
@ -153,18 +157,20 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
|||
is_tiled = false;
|
||||
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
|
||||
type = vk::ImageType::e2D;
|
||||
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||
size.depth = 1;
|
||||
pitch = size.width;
|
||||
guest_size_bytes = buffer.GetSizeAligned();
|
||||
usage.depth_target = true;
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||
is_tiled = image.IsTiled();
|
||||
tiling_mode = image.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
type = ConvertImageType(image.type);
|
||||
type = ConvertImageType(image.GetType());
|
||||
size.width = image.width + 1;
|
||||
size.height = image.height + 1;
|
||||
size.depth = 1;
|
||||
|
@ -222,7 +228,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
}
|
||||
}
|
||||
|
||||
info.usage = ImageUsageFlags(info);
|
||||
usage = ImageUsageFlags(info);
|
||||
|
||||
if (info.pixel_format == vk::Format::eD32Sfloat) {
|
||||
aspect_mask = vk::ImageAspectFlagBits::eDepth;
|
||||
|
@ -243,7 +249,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
.mipLevels = static_cast<u32>(info.resources.levels),
|
||||
.arrayLayers = static_cast<u32>(info.resources.layers),
|
||||
.tiling = vk::ImageTiling::eOptimal,
|
||||
.usage = info.usage,
|
||||
.usage = usage,
|
||||
.initialLayout = vk::ImageLayout::eUndefined,
|
||||
};
|
||||
|
||||
|
@ -296,6 +302,31 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
|
|||
pl_stage = dst_pl_stage;
|
||||
}
|
||||
|
||||
void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
// Copy to the image.
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = info.pitch,
|
||||
.bufferImageHeight = info.size.height,
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {info.size.width, info.size.height, 1},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
}
|
||||
|
||||
Image::~Image() = default;
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -42,18 +42,28 @@ struct ImageInfo {
|
|||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
||||
|
||||
bool IsTiled() const {
|
||||
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
|
||||
}
|
||||
bool IsBlockCoded() const;
|
||||
bool IsPacked() const;
|
||||
bool IsDepthStencil() const;
|
||||
|
||||
struct {
|
||||
u32 texture : 1;
|
||||
u32 storage : 1;
|
||||
u32 render_target : 1;
|
||||
u32 depth_target : 1;
|
||||
u32 vo_buffer : 1;
|
||||
} usage; // Usage data tracked during image lifetime
|
||||
|
||||
bool is_tiled = false;
|
||||
bool is_storage = false;
|
||||
bool is_vo_surface = false;
|
||||
vk::Format pixel_format = vk::Format::eUndefined;
|
||||
vk::ImageType type = vk::ImageType::e1D;
|
||||
vk::ImageUsageFlags usage;
|
||||
SubresourceExtent resources;
|
||||
Extent3D size{1, 1, 1};
|
||||
u32 num_samples = 1;
|
||||
u32 pitch = 0;
|
||||
u32 guest_size_bytes = 0;
|
||||
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
|
||||
|
@ -117,6 +127,7 @@ struct Image {
|
|||
}
|
||||
|
||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask);
|
||||
void Upload(vk::Buffer buffer, u64 offset);
|
||||
|
||||
const Vulkan::Instance* instance;
|
||||
Vulkan::Scheduler* scheduler;
|
||||
|
@ -131,6 +142,7 @@ struct Image {
|
|||
std::optional<ImageView> view_for_detiler;
|
||||
|
||||
// Resource state tracking
|
||||
vk::ImageUsageFlags usage;
|
||||
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
||||
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
|
|
|
@ -48,7 +48,7 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
|||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
|
||||
: is_storage{is_storage} {
|
||||
type = ConvertImageViewType(image.type);
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
range.base.level = 0;
|
||||
range.base.layer = 0;
|
||||
|
|
|
@ -151,7 +151,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
|
|||
// temporary remove its storage bit.
|
||||
std::optional<vk::ImageUsageFlags> usage_override;
|
||||
if (!image.info.is_storage) {
|
||||
usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage;
|
||||
usage_override = image.usage & ~vk::ImageUsageFlagBits::eStorage;
|
||||
}
|
||||
|
||||
const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, usage_override);
|
||||
|
@ -183,7 +183,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
|
|||
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits::eColorAttachmentRead);
|
||||
|
||||
ImageViewInfo view_info{buffer, image.info.is_vo_surface};
|
||||
ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer};
|
||||
return RegisterImageView(image, view_info);
|
||||
}
|
||||
|
||||
|
@ -210,26 +210,8 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
if (!tile_manager.TryDetile(image)) {
|
||||
// Upload data to the staging buffer.
|
||||
const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4);
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
// Copy to the image.
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
image.Upload(staging.Handle(), offset);
|
||||
}
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <magic_enum.hpp>
|
||||
#include <vulkan/vulkan_to_string.hpp>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue