mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 08:06:20 +00:00
Initial instancing and asynchronous compute queues (#207)
* gnm_driver: added `sceGnmRegisterOwner` and `sceGnmRegisterResource` * video_out: `sceVideoOutGetDeviceCapabilityInfo` for sdk runtime * gnm_driver: correct vqid index range * amdgpu: indirect buffer, release mem and some additional irq modes * amdgpu: added ASC commands processor * shader_recompiler: added support for fetch instance id * amdgpu: classic bitfields for T# representation (debugging experience) * renderer_vulkan: skip zero sized VBs from binding * texture_cache: image upload logic moved into `Image` object * gnm_driver: `sceGnmDingDong` implementation * texture_cache: `Image` usage flags moved; correct VO buffer pitch
This commit is contained in:
parent
a9cbd8287c
commit
cb6b21de1f
19 changed files with 361 additions and 100 deletions
|
@ -72,6 +72,9 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
attrib.sgpr_base = it->base_sgpr;
|
||||
attrib.dword_offset = it->dword_offset;
|
||||
|
||||
// Store instance id rate
|
||||
attrib.instance_data = inst.src[0].code;
|
||||
|
||||
// Mark load as used.
|
||||
it->dst_reg = -1;
|
||||
}
|
||||
|
|
|
@ -9,11 +9,12 @@
|
|||
namespace Shader::Gcn {
|
||||
|
||||
struct VertexAttribute {
|
||||
u8 semantic; ///< Semantic index of the attribute
|
||||
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
||||
u8 num_elements; ///< Number of components to load
|
||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
u8 semantic; ///< Semantic index of the attribute
|
||||
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
||||
u8 num_elements; ///< Number of components to load
|
||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
};
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
|
||||
|
|
|
@ -194,6 +194,11 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
|
||||
}
|
||||
|
||||
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
|
||||
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
|
||||
attrib.instance_data);
|
||||
}
|
||||
|
||||
// Read the V# of the attribute to figure out component number and type.
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
|
||||
|
@ -203,6 +208,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -270,8 +270,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
.type = image.type,
|
||||
.nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()),
|
||||
.type = image.GetType(),
|
||||
.nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
|
||||
.is_storage = IsImageStorageInstruction(inst),
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
});
|
||||
|
@ -293,7 +293,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
// Now that we know the image type, adjust texture coordinate vector.
|
||||
const IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
switch (image.type) {
|
||||
switch (image.GetType()) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
return {body->Arg(0), body->Arg(1)};
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
|
@ -305,7 +305,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
case AmdGpu::ImageType::Cube:
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.type.Value());
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
}
|
||||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
|
|
@ -72,11 +72,19 @@ using SamplerResourceList = boost::container::static_vector<SamplerResource, 8>;
|
|||
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
enum InstanceIdType : u8 {
|
||||
None = 0,
|
||||
OverStepRate0 = 1,
|
||||
OverStepRate1 = 2,
|
||||
Plain = 3,
|
||||
};
|
||||
|
||||
AmdGpu::NumberFormat fmt;
|
||||
u16 binding;
|
||||
u16 num_components;
|
||||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
InstanceIdType instance_step_rate;
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue