diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 9aede3304..70cf09a97 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -703,7 +703,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da cmdbuf[3] = (count_addr != 0 ? 1u : 0u) << 0x1e; cmdbuf[4] = max_count; *(u64*)(&cmdbuf[5]) = count_addr; - cmdbuf[7] = AmdGpu::Liverpool::DrawIndexedIndirectArgsSize; + cmdbuf[7] = sizeof(DrawIndexedIndirectArgs); cmdbuf[8] = 0; cmdbuf += 9; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 12b5de436..f7b710edd 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -410,7 +410,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; - const auto size = sizeof(PM4CmdDrawIndirect::DrawInstancedArgs); + const auto size = sizeof(DrawIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -427,7 +427,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_index_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; - const auto size = sizeof(PM4CmdDrawIndexIndirect::DrawIndexInstancedArgs); + const auto size = sizeof(DrawIndexedIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -442,10 +442,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + reinterpret_cast(header); const auto offset = draw_index_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; - const auto size = sizeof(PM4CmdDrawIndexIndirect::DrawIndexInstancedArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -453,7 +452,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address)); - rasterizer->DrawIndirect(true, ib_address, offset, size, + rasterizer->DrawIndirect(true, ib_address, offset, draw_index_indirect->stride, draw_index_indirect->count, draw_index_indirect->countAddr); rasterizer->ScopeMarkerEnd(); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0595a242c..0ef9397b0 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -57,8 +57,6 @@ struct Liverpool { static constexpr u32 ConfigRegWordOffset = 0x2000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; - static constexpr u32 DrawIndirectArgsSize = 0x10u; - static constexpr u32 DrawIndexedIndirectArgsSize = 0x14u; using UserData = std::array; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index d6cab23d2..be6751285 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -778,14 +778,15 @@ struct PM4CmdDispatchIndirect { u32 dispatch_initiator; ///< Dispatch Initiator Register }; -struct PM4CmdDrawIndirect { - struct DrawInstancedArgs { - u32 vertex_count_per_instance; - u32 instance_count; - u32 start_vertex_location; - u32 start_instance_location; - }; +struct DrawIndirectArgs { + u32 vertex_count_per_instance; + u32 instance_count; + u32 start_vertex_location; + u32 start_instance_location; +}; +static_assert(sizeof(DrawIndirectArgs) == 0x10u); +struct PM4CmdDrawIndirect { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -801,15 +802,16 @@ struct PM4CmdDrawIndirect { u32 draw_initiator; ///< Draw Initiator Register }; -struct PM4CmdDrawIndexIndirect { - struct DrawIndexInstancedArgs { - u32 index_count_per_instance; - u32 instance_count; - u32 start_index_location; - u32 base_vertex_location; - u32 start_instance_location; - }; +struct DrawIndexedIndirectArgs { + u32 index_count_per_instance; + u32 instance_count; + u32 start_index_location; + u32 base_vertex_location; + u32 start_instance_location; +}; +static_assert(sizeof(DrawIndexedIndirectArgs) == 0x14u); +struct PM4CmdDrawIndexIndirect { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -822,16 +824,29 @@ struct PM4CmdDrawIndexIndirect { BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the ///< StartInstanceLocation it fetched from memory }; + u32 draw_initiator; ///< Draw Initiator Register +}; +struct PM4CmdDrawIndexIndirectMulti { + PM4Type3Header header; ///< header + u32 data_offset; ///< Byte aligned offset where the required data structure starts + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the + ///< BaseVertexLocation it fetched from memory + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the + ///< StartInstanceLocation it fetched from memory + }; union { u32 dw4; - struct { - BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count - BitField<30, 1, u32> - countIndirectEnable; ///< Indicates the data structure count is in memory - BitField<31, 1, u32> - drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC - }; + BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count + BitField<30, 1, u32> + countIndirectEnable; ///< Indicates the data structure count is in memory + BitField<31, 1, u32> + drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC }; u32 count; ///< Count of data structures to loop through before going to next packet u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a8b4728c0..3c4012a26 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -115,7 +115,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { } } -void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, +void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 stride, u32 max_count, VAddr count_address) { RENDERER_TRACE; @@ -142,7 +142,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 buffer_cache.BindVertexBuffers(vs_info); buffer_cache.BindIndexBuffer(is_indexed, 0); - const auto [buffer, base] = buffer_cache.ObtainBuffer(arg_address + offset, size, false); + const auto [buffer, base] = + buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count, false); VideoCore::Buffer* count_buffer{}; u32 count_base{}; @@ -158,26 +159,22 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 const auto cmdbuf = scheduler.CommandBuffer(); if (is_indexed) { - static_assert(sizeof(VkDrawIndexedIndirectCommand) == - AmdGpu::Liverpool::DrawIndexedIndirectArgsSize); + ASSERT(sizeof(VkDrawIndexedIndirectCommand) == stride); if (count_address != 0) { cmdbuf.drawIndexedIndirectCount(buffer->Handle(), base, count_buffer->Handle(), - count_base, max_count, - AmdGpu::Liverpool::DrawIndexedIndirectArgsSize); + count_base, max_count, stride); } else { - cmdbuf.drawIndexedIndirect(buffer->Handle(), base, max_count, - AmdGpu::Liverpool::DrawIndexedIndirectArgsSize); + cmdbuf.drawIndexedIndirect(buffer->Handle(), base, max_count, stride); } } else { - static_assert(sizeof(VkDrawIndirectCommand) == AmdGpu::Liverpool::DrawIndirectArgsSize); + ASSERT(sizeof(VkDrawIndirectCommand) == stride); if (count_address != 0) { cmdbuf.drawIndirectCount(buffer->Handle(), base, count_buffer->Handle(), count_base, - max_count, AmdGpu::Liverpool::DrawIndirectArgsSize); + max_count, stride); } else { - cmdbuf.drawIndirect(buffer->Handle(), base, max_count, - AmdGpu::Liverpool::DrawIndirectArgsSize); + cmdbuf.drawIndirect(buffer->Handle(), base, max_count, stride); } } }