diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 0fbfa8b9b..686e8e84f 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -584,7 +584,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + const auto* event = reinterpret_cast(header); + LOG_DEBUG(Render_Vulkan, + "Encountered EventWrite: event_type = {}, event_index = {}", + magic_enum::enum_name(event->event_type.Value()), + magic_enum::enum_name(event->event_index.Value())); + if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) { + // TODO: handle proper synchronization, for now signal that update is done + // immediately + regs.cp_strmout_cntl.offset_update_done = 1; + } break; } case PM4ItOpcode::EventWriteEos: { @@ -732,6 +741,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + LOG_WARNING(Render_Vulkan, + "Unimplemented IT_STRMOUT_BUFFER_UPDATE, update_memory = {}, " + "source_select = {}, buffer_select = {}", + strmout->update_memory.Value(), + magic_enum::enum_name(strmout->source_select.Value()), + strmout->buffer_select.Value()); + break; + } default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c4bebd05f..a62141099 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1175,6 +1175,14 @@ struct Liverpool { BitField<22, 2, u32> onchip; }; + union StreamOutControl { + u32 raw; + struct { + u32 offset_update_done : 1; + u32 : 31; + }; + }; + union StreamOutConfig { u32 raw; struct { @@ -1378,7 +1386,9 @@ struct Liverpool { AaConfig aa_config; INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); ColorBuffer color_buffers[NumColorBuffers]; - INSERT_PADDING_WORDS(0xC242 - 0xA390); + INSERT_PADDING_WORDS(0xC03F - 0xA390); + StreamOutControl cp_strmout_cntl; + INSERT_PADDING_WORDS(0xC242 - 0xC040); PrimitiveType primitive_type; INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; @@ -1668,6 +1678,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); +static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 6dc7d97a6..58ecda93e 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -246,6 +246,46 @@ struct PM4CmdNop { }; }; +enum class SourceSelect : u32 { + BufferOffset = 0, + VgtStrmoutBufferFilledSize = 1, + SrcAddress = 2, + None = 3, +}; + +struct PM4CmdStrmoutBufferUpdate { + PM4Type3Header header; + union { + BitField<0, 1, u32> update_memory; + BitField<1, 2, SourceSelect> source_select; + BitField<8, 2, u32> buffer_select; + u32 control; + }; + union { + BitField<2, 30, u32> dst_address_lo; + BitField<0, 2, u32> swap_dst; + }; + u32 dst_address_hi; + union { + u32 buffer_offset; + BitField<2, 30, u32> src_address_lo; + BitField<0, 2, u32> swap_src; + }; + u32 src_address_hi; + + template + T DstAddress() const { + ASSERT(update_memory.Value() == 1); + return reinterpret_cast(dst_address_lo.Value() | u64(dst_address_hi & 0xFFFF) << 32); + } + + template + T SrcAddress() const { + ASSERT(source_select.Value() == SourceSelect::SrcAddress); + return reinterpret_cast(src_address_lo.Value() | u64(src_address_hi & 0xFFFF) << 32); + } +}; + struct PM4CmdDrawIndexOffset2 { PM4Type3Header header; u32 max_size; ///< Maximum number of indices @@ -303,6 +343,80 @@ static u64 GetGpuClock64() { return static_cast(ticks); } +// VGT_EVENT_INITIATOR.EVENT_TYPE +enum class EventType : u32 { + SampleStreamoutStats1 = 1, + SampleStreamoutStats2 = 2, + SampleStreamoutStats3 = 3, + CacheFlushTs = 4, + ContextDone = 5, + CacheFlush = 6, + CsPartialFlush = 7, + VgtStreamoutSync = 8, + VgtStreamoutReset = 10, + EndOfPipeIncrDe = 11, + EndOfPipeIbEnd = 12, + RstPixCnt = 13, + VsPartialFlush = 15, + PsPartialFlush = 16, + FlushHsOutput = 17, + FlushLsOutput = 18, + CacheFlushAndInvTsEvent = 20, + ZpassDone = 21, + CacheFlushAndInvEvent = 22, + PerfcounterStart = 23, + PerfcounterStop = 24, + PipelineStatStart = 25, + PipelineStatStop = 26, + PerfcounterSample = 27, + FlushEsOutput = 28, + FlushGsOutput = 29, + SamplePipelineStat = 30, + SoVgtStreamoutFlush = 31, + SampleStreamoutStats = 32, + ResetVtxCnt = 33, + VgtFlush = 36, + ScSendDbVpz = 39, + BottomOfPipeTs = 40, + DbCacheFlushAndInv = 42, + FlushAndInvDbDataTs = 43, + FlushAndInvDbMeta = 44, + FlushAndInvCbDataTs = 45, + FlushAndInvCbMeta = 46, + CsDone = 47, + PsDone = 48, + FlushAndInvCbPixelData = 49, + ThreadTraceStart = 51, + ThreadTraceStop = 52, + ThreadTraceFlush = 54, + ThreadTraceFinish = 55, + PixelPipeStatControl = 56, + PixelPipeStatDump = 57, + PixelPipeStatReset = 58, +}; + +enum class EventIndex : u32 { + Other = 0, + ZpassDone = 1, + SamplePipelineStat = 2, + SampleStreamoutStatSx = 3, + CsVsPsPartialFlush = 4, + EopReserved = 5, + EosReserved = 6, + CacheFlush = 7, +}; + +struct PM4CmdEventWrite { + PM4Type3Header header; + union { + u32 event_control; + BitField<0, 6, EventType> event_type; ///< Event type written to VGT_EVENT_INITIATOR + BitField<8, 4, EventIndex> event_index; ///< Event index + BitField<20, 1, u32> inv_l2; ///< Send WBINVL2 op to the TC L2 cache when EVENT_INDEX = 0111 + }; + u32 address[]; +}; + struct PM4CmdEventWriteEop { PM4Type3Header header; union {