video_core: added support for indirect dispatches (gfx only)

This commit is contained in:
psucien 2024-08-28 23:25:58 +02:00
parent 3fbb68048e
commit 9d349a1308
6 changed files with 101 additions and 1 deletions

View file

@ -383,6 +383,22 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
break;
}
case PM4ItOpcode::DispatchIndirect: {
const auto* dispatch_indirect =
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
const auto offset = dispatch_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->DispatchIndirect(ib_address, offset, size);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::NumInstances: {
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
regs.num_instances.num_instances = num_instances->num_instances;
@ -399,6 +415,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::SetBase: {
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>();
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;

View file

@ -1127,6 +1127,7 @@ private:
std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{};
ComputeProgram cs_state{};
VAddr indirect_args_addr{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};

View file

@ -704,4 +704,40 @@ struct PM4CmdReleaseMem {
}
};
struct PM4CmdSetBase {
enum class BaseIndex : u32 {
DisplayListPatchTable = 0b0000,
DrawIndexIndirPatchTable = 0b0001,
GdsPartition = 0b0010,
CePartition = 0b0011,
};
PM4Type3Header header;
union {
BitField<0, 4, BaseIndex> base_index;
u32 dw1;
};
u32 address0;
u32 address1;
template <typename T>
T Address() const {
ASSERT(base_index == BaseIndex::DisplayListPatchTable ||
base_index == BaseIndex::DrawIndexIndirPatchTable);
return reinterpret_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
}
};
struct PM4CmdDispatchIndirect {
struct GroupDimensions {
u32 dim_x;
u32 dim_y;
u32 dim_z;
};
PM4Type3Header header;
u32 data_offset; ///< Byte aligned offset where the required data structure starts
u32 dispatch_initiator; ///< Dispatch Initiator Register
};
} // namespace AmdGpu