diff --git a/CMakeLists.txt b/CMakeLists.txt index adff454b8..3db92c476 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -657,6 +657,7 @@ set(COMMON src/common/logging/backend.cpp src/common/arch.h src/common/assert.cpp src/common/assert.h + src/common/binary_helper.h src/common/bit_array.h src/common/bit_field.h src/common/bounded_threadsafe_queue.h @@ -926,6 +927,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/region_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h + src/video_core/renderer_vulkan/shader_cache.cpp + src/video_core/renderer_vulkan/shader_cache.h src/video_core/renderer_vulkan/vk_common.cpp src/video_core/renderer_vulkan/vk_common.h src/video_core/renderer_vulkan/vk_compute_pipeline.cpp diff --git a/src/common/binary_helper.h b/src/common/binary_helper.h new file mode 100644 index 000000000..dcf5c2dc4 --- /dev/null +++ b/src/common/binary_helper.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include +#include "common/logging/log.h" + +using u32 = uint32_t; + +template +void writeBin(std::ostream& os, const T& v) { + LOG_INFO(Render_Recompiler, "BinaryHelper: Pos: {}", static_cast(os.tellp())); + os.write(reinterpret_cast(&v), sizeof(T)); +} + +template +void readBin(std::istream& is, T& v) { + if (is.eof()) { + LOG_WARNING(Render_Recompiler, "BinaryHelper: EOF"); + } + is.read(reinterpret_cast(&v), sizeof(T)); +} \ No newline at end of file diff --git a/src/common/hash.h b/src/common/hash.h index d5cacedd7..b29cfb90f 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -5,10 +5,7 @@ #include "common/types.h" -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 12) + (seed >> 4)); -} - -[[nodiscard]] inline u32 HashCombine(const u32 seed, const u32 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +template +T HashCombine(const T& seed, const U& value) { + return seed ^ (static_cast(value) + 0x9e3779b9 + (seed << 6) + (seed >> 2)); } \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 480ceee0b..e2ee3e103 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -256,6 +256,11 @@ void Emulator::Run(std::filesystem::path file, const std::vector ar } VideoCore::SetOutputDir(mount_captures_dir, id); + const auto shader_cache_dir = Common::FS::GetUserPath(Common::FS::PathType::ShaderDir) / "cache"; + if (!std::filesystem::exists(shader_cache_dir)) { + std::filesystem::create_directories(shader_cache_dir); + LOG_INFO(Loader, "Created shader cache directory: {}", shader_cache_dir.string()); + } // Initialize kernel and library facilities. Libraries::InitHLELibs(&linker->GetHLESymbols()); diff --git a/src/video_core/renderer_vulkan/shader_cache.cpp b/src/video_core/renderer_vulkan/shader_cache.cpp new file mode 100644 index 000000000..7533674d1 --- /dev/null +++ b/src/video_core/renderer_vulkan/shader_cache.cpp @@ -0,0 +1,798 @@ +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#include +#endif +#include "common/hash.h" +#include "common/path_util.h" +#include "common/io_file.h" +#include "common/binary_helper.h" +#include "common/logging/log.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/specialization.h" + +using u64 = uint64_t; +using u32 = uint32_t; + +namespace ShaderCache { + +const auto shader_cache_dir = Common::FS::GetUserPath(Common::FS::PathType::ShaderDir) / "cache"; +std::unordered_map> g_ud_storage; + +u64 CalculateSpecializationHash(const Shader::StageSpecialization& spec) { + u64 hash = 0; + + const auto& runtime_info = spec.runtime_info; + hash = HashCombine(hash, static_cast(runtime_info.stage)); + hash = HashCombine(hash, runtime_info.num_user_data); + hash = HashCombine(hash, runtime_info.num_input_vgprs); + hash = HashCombine(hash, runtime_info.num_allocated_vgprs); + hash = HashCombine(hash, static_cast(runtime_info.fp_denorm_mode32)); + hash = HashCombine(hash, static_cast(runtime_info.fp_round_mode32)); + + switch (runtime_info.stage) { + case Shader::Stage::Local: + hash = HashCombine(hash, runtime_info.ls_info.ls_stride); + hash = HashCombine(hash, runtime_info.ls_info.links_with_tcs); + break; + case Shader::Stage::Export: + hash = HashCombine(hash, runtime_info.es_info.vertex_data_size); + break; + case Shader::Stage::Vertex: + hash = HashCombine(hash, runtime_info.vs_info.num_outputs); + for (size_t i = 0; + i < runtime_info.vs_info.num_outputs && i < runtime_info.vs_info.outputs.size(); ++i) { + const auto& output_map = runtime_info.vs_info.outputs[i]; + for (const auto& output : output_map) { + hash = HashCombine(hash, static_cast(output)); + } + } + hash = HashCombine(hash, runtime_info.vs_info.emulate_depth_negative_one_to_one); + hash = HashCombine(hash, runtime_info.vs_info.clip_disable); + hash = HashCombine(hash, static_cast(runtime_info.vs_info.tess_type)); + hash = HashCombine(hash, static_cast(runtime_info.vs_info.tess_topology)); + hash = HashCombine(hash, static_cast(runtime_info.vs_info.tess_partitioning)); + hash = HashCombine(hash, runtime_info.vs_info.hs_output_cp_stride); + break; + case Shader::Stage::Hull: + hash = HashCombine(hash, runtime_info.hs_info.num_input_control_points); + hash = HashCombine(hash, runtime_info.hs_info.num_threads); + hash = HashCombine(hash, static_cast(runtime_info.hs_info.tess_type)); + hash = HashCombine(hash, runtime_info.hs_info.ls_stride); + hash = HashCombine(hash, runtime_info.hs_info.hs_output_cp_stride); + hash = HashCombine(hash, runtime_info.hs_info.hs_output_base); + break; + case Shader::Stage::Geometry: + hash = HashCombine(hash, runtime_info.gs_info.num_invocations); + hash = HashCombine(hash, runtime_info.gs_info.output_vertices); + hash = HashCombine(hash, runtime_info.gs_info.in_vertex_data_size); + hash = HashCombine(hash, runtime_info.gs_info.out_vertex_data_size); + hash = HashCombine(hash, static_cast(runtime_info.gs_info.in_primitive)); + for (const auto& out_prim : runtime_info.gs_info.out_primitive) { + hash = HashCombine(hash, static_cast(out_prim)); + } + hash = HashCombine(hash, runtime_info.gs_info.vs_copy_hash); + break; + case Shader::Stage::Fragment: + hash = HashCombine(hash, runtime_info.fs_info.en_flags.raw); + hash = HashCombine(hash, runtime_info.fs_info.addr_flags.raw); + hash = HashCombine(hash, runtime_info.fs_info.num_inputs); + + for (u32 i = 0; + i < runtime_info.fs_info.num_inputs && i < runtime_info.fs_info.inputs.size(); ++i) { + const auto& input = runtime_info.fs_info.inputs[i]; + hash = HashCombine(hash, input.param_index); + hash = HashCombine(hash, input.is_default); + hash = HashCombine(hash, input.is_flat); + hash = HashCombine(hash, input.default_value); + } + + for (const auto& color_buffer : runtime_info.fs_info.color_buffers) { + hash = HashCombine(hash, static_cast(color_buffer.num_format)); + hash = HashCombine(hash, static_cast(color_buffer.num_conversion)); + hash = HashCombine(hash, static_cast(color_buffer.export_format)); + hash = HashCombine(hash, color_buffer.needs_unorm_fixup); + hash = HashCombine(hash, color_buffer.swizzle.r); + hash = HashCombine(hash, color_buffer.swizzle.g); + hash = HashCombine(hash, color_buffer.swizzle.b); + hash = HashCombine(hash, color_buffer.swizzle.a); + } + break; + case Shader::Stage::Compute: + hash = HashCombine(hash, runtime_info.cs_info.shared_memory_size); + for (u32 i = 0; i < 3; ++i) { + hash = HashCombine(hash, runtime_info.cs_info.workgroup_size[i]); + hash = HashCombine(hash, runtime_info.cs_info.tgid_enable[i]); + } + break; + } + + if (spec.fetch_shader_data) { + const auto& fetch_shader = *spec.fetch_shader_data; + hash = HashCombine(hash, fetch_shader.size); + hash = HashCombine(hash, static_cast(fetch_shader.vertex_offset_sgpr)); + hash = HashCombine(hash, static_cast(fetch_shader.instance_offset_sgpr)); + + for (const auto& attr : fetch_shader.attributes) { + hash = HashCombine(hash, static_cast(attr.semantic)); + hash = HashCombine(hash, static_cast(attr.dest_vgpr)); + hash = HashCombine(hash, static_cast(attr.num_elements)); + hash = HashCombine(hash, static_cast(attr.sgpr_base)); + hash = HashCombine(hash, static_cast(attr.dword_offset)); + hash = HashCombine(hash, static_cast(attr.instance_data)); + } + } + + for (const auto& vs_attrib : spec.vs_attribs) { + hash = HashCombine(hash, vs_attrib.num_components); + hash = HashCombine(hash, static_cast(vs_attrib.num_class)); + hash = HashCombine(hash, vs_attrib.dst_select.r); + hash = HashCombine(hash, vs_attrib.dst_select.g); + hash = HashCombine(hash, vs_attrib.dst_select.b); + hash = HashCombine(hash, vs_attrib.dst_select.a); + } + + const std::string bitset_str = spec.bitset.to_string(); + for (size_t i = 0; i < bitset_str.size(); i += 8) { + size_t end = std::min(i + 8, bitset_str.size()); + std::string chunk = bitset_str.substr(i, end - i); + u8 value = 0; + for (size_t j = 0; j < chunk.size(); ++j) { + if (chunk[j] == '1') { + value |= (1 << j); + } + } + hash = HashCombine(hash, value); + } + + for (const auto& buffer : spec.buffers) { + hash = HashCombine(hash, buffer.stride); + hash = HashCombine(hash, buffer.is_storage); + hash = HashCombine(hash, buffer.is_formatted); + hash = HashCombine(hash, buffer.swizzle_enable); + + if (buffer.is_formatted) { + hash = HashCombine(hash, buffer.data_format); + hash = HashCombine(hash, buffer.num_format); + hash = HashCombine(hash, buffer.dst_select.r); + hash = HashCombine(hash, buffer.dst_select.g); + hash = HashCombine(hash, buffer.dst_select.b); + hash = HashCombine(hash, buffer.dst_select.a); + hash = HashCombine(hash, static_cast(buffer.num_conversion)); + } + + if (buffer.swizzle_enable) { + hash = HashCombine(hash, buffer.index_stride); + hash = HashCombine(hash, buffer.element_size); + } + } + + for (const auto& image : spec.images) { + hash = HashCombine(hash, static_cast(image.type)); + hash = HashCombine(hash, image.is_integer); + hash = HashCombine(hash, image.is_storage); + hash = HashCombine(hash, image.is_cube); + + if (image.is_storage) { + hash = HashCombine(hash, image.dst_select.r); + hash = HashCombine(hash, image.dst_select.g); + hash = HashCombine(hash, image.dst_select.b); + hash = HashCombine(hash, image.dst_select.a); + } + + hash = HashCombine(hash, static_cast(image.num_conversion)); + } + + for (const auto& fmask : spec.fmasks) { + hash = HashCombine(hash, fmask.width); + hash = HashCombine(hash, fmask.height); + } + + for (const auto& sampler : spec.samplers) { + hash = HashCombine(hash, sampler.force_unnormalized); + } + + hash = HashCombine(hash, spec.start.buffer); + hash = HashCombine(hash, spec.start.unified); + hash = HashCombine(hash, spec.start.user_data); + + if (spec.info) { + hash = HashCombine(hash, spec.info->pgm_hash); + hash = HashCombine(hash, static_cast(spec.info->stage)); + hash = HashCombine(hash, static_cast(spec.info->l_stage)); + hash = HashCombine(hash, spec.info->has_storage_images); + hash = HashCombine(hash, spec.info->has_discard); + hash = HashCombine(hash, spec.info->has_image_gather); + hash = HashCombine(hash, spec.info->has_image_query); + hash = HashCombine(hash, spec.info->uses_lane_id); + hash = HashCombine(hash, spec.info->uses_group_quad); + hash = HashCombine(hash, spec.info->uses_group_ballot); + hash = HashCombine(hash, spec.info->uses_fp16); + hash = HashCombine(hash, spec.info->uses_fp64); + hash = HashCombine(hash, spec.info->uses_pack_10_11_11); + hash = HashCombine(hash, spec.info->uses_unpack_10_11_11); + hash = HashCombine(hash, spec.info->stores_tess_level_outer); + hash = HashCombine(hash, spec.info->stores_tess_level_inner); + hash = HashCombine(hash, spec.info->translation_failed); + hash = HashCombine(hash, spec.info->mrt_mask); + hash = HashCombine(hash, spec.info->has_fetch_shader); + hash = HashCombine(hash, spec.info->fetch_shader_sgpr_base); + + for (size_t i = 0; i < spec.info->loads.flags.size(); ++i) { + hash = HashCombine(hash, spec.info->loads.flags[i]); + } + + for (size_t i = 0; i < spec.info->stores.flags.size(); ++i) { + hash = HashCombine(hash, spec.info->stores.flags[i]); + } + + hash = HashCombine(hash, spec.info->ud_mask.mask); + + hash = HashCombine(hash, spec.info->uses_patches); + } + + return hash; +} + +void SerializeInfo(std::ostream& info_serialized, Shader::Info info) { + writeBin(info_serialized, info.ud_mask.mask); + + u32 bufferCount = static_cast(info.buffers.size()); + writeBin(info_serialized, bufferCount); // Buffer Amount + + for (const auto& buffer : info.buffers) { + writeBin(info_serialized, buffer.sharp_idx); + writeBin(info_serialized, static_cast(buffer.used_types)); + writeBin(info_serialized, static_cast(buffer.buffer_type)); + writeBin(info_serialized, buffer.instance_attrib); + writeBin(info_serialized, static_cast(buffer.is_written ? 1 : 0)); + writeBin(info_serialized, static_cast(buffer.is_formatted ? 1 : 0)); + + writeBin(info_serialized, buffer.inline_cbuf.base_address); + writeBin(info_serialized, buffer.inline_cbuf._padding0); + writeBin(info_serialized, buffer.inline_cbuf.stride); + writeBin(info_serialized, buffer.inline_cbuf.cache_swizzle); + writeBin(info_serialized, buffer.inline_cbuf.swizzle_enable); + writeBin(info_serialized, buffer.inline_cbuf.num_records); + writeBin(info_serialized, buffer.inline_cbuf.dst_sel_x); + writeBin(info_serialized, buffer.inline_cbuf.dst_sel_y); + writeBin(info_serialized, buffer.inline_cbuf.dst_sel_z); + writeBin(info_serialized, buffer.inline_cbuf.dst_sel_w); + writeBin(info_serialized, buffer.inline_cbuf.num_format); + writeBin(info_serialized, buffer.inline_cbuf.data_format); + writeBin(info_serialized, buffer.inline_cbuf.element_size); + writeBin(info_serialized, buffer.inline_cbuf.index_stride); + writeBin(info_serialized, buffer.inline_cbuf.add_tid_enable); + writeBin(info_serialized, buffer.inline_cbuf._padding1); + writeBin(info_serialized, buffer.inline_cbuf.type); + + } + + // Image-Resources + u32 imageCount = static_cast(info.images.size()); + writeBin(info_serialized, imageCount); // Image Amount + + for (const auto& image : info.images) { + writeBin(info_serialized, image.sharp_idx); + writeBin(info_serialized, static_cast(image.is_depth ? 1 : 0)); + writeBin(info_serialized, static_cast(image.is_atomic ? 1 : 0)); + writeBin(info_serialized, static_cast(image.is_array ? 1 : 0)); + writeBin(info_serialized, static_cast(image.is_written ? 1 : 0)); + } + + // Sampler-Resources + u32 samplerCount = static_cast(info.samplers.size()); + writeBin(info_serialized, samplerCount); // Sampler Amount + + for (const auto& sampler : info.samplers) { + if (std::holds_alternative(sampler.sampler)) + { + std::uint8_t tag = 0; + writeBin(info_serialized, tag); + + u32 sharp_idx = std::get(sampler.sampler); + writeBin(info_serialized, sharp_idx); + } + else + { + std::uint8_t tag = 1; + writeBin(info_serialized, tag); + + const AmdGpu::Sampler& hw_sampler = + std::get(sampler.sampler); + writeBin(info_serialized, hw_sampler); + } + + std::uint8_t packed = + static_cast((sampler.disable_aniso & 0x1) << 4) | + static_cast(sampler.associated_image & 0xF); + + writeBin(info_serialized, packed); + } + + // FMask-Resources + u32 fmaskCount = static_cast(info.fmasks.size()); + writeBin(info_serialized, fmaskCount); // FMask Amount + + for (const auto& fmask : info.fmasks) { + writeBin(info_serialized, fmask.sharp_idx); + } + + // GS Copy Data + u32 mapCount = static_cast(info.gs_copy_data.attr_map.size()); + writeBin(info_serialized, mapCount); + + for (auto const& [loc, attr_pair] : info.gs_copy_data.attr_map) { + writeBin(info_serialized, loc); + writeBin(info_serialized, static_cast(attr_pair.first)); + writeBin(info_serialized, attr_pair.second); + } + + // SRT Info + u32 srtCount = static_cast(info.srt_info.srt_reservations.size()); + writeBin(info_serialized, srtCount); + + for (const auto& res : info.srt_info.srt_reservations) { + writeBin(info_serialized, res.sgpr_base); + writeBin(info_serialized, res.dword_offset); + writeBin(info_serialized, res.num_dwords); + } + + writeBin(info_serialized, info.srt_info.flattened_bufsize_dw); + + // Flat UD + + u32 flatCount = static_cast(info.flattened_ud_buf.size()); + writeBin(info_serialized, flatCount); + + for (const auto& flat : info.flattened_ud_buf) { + writeBin(info_serialized, flat); + } + + // Tessellation Data + writeBin(info_serialized, info.tess_consts_ptr_base); + writeBin(info_serialized, info.tess_consts_dword_offset); + + // Flags + writeBin(info_serialized, static_cast(info.has_storage_images ? 1 : 0)); + writeBin(info_serialized, static_cast(info.has_discard ? 1 : 0)); + writeBin(info_serialized, static_cast(info.has_image_gather ? 1 : 0)); + writeBin(info_serialized, static_cast(info.has_image_query ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_lane_id ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_group_quad ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_group_ballot ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_fp16 ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_fp64 ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_pack_10_11_11 ? 1 : 0)); + writeBin(info_serialized, static_cast(info.uses_unpack_10_11_11 ? 1 : 0)); + writeBin(info_serialized, static_cast(info.stores_tess_level_outer ? 1 : 0)); + writeBin(info_serialized, static_cast(info.stores_tess_level_inner ? 1 : 0)); + writeBin(info_serialized, static_cast(info.translation_failed ? 1 : 0)); + + // MRT Mask + writeBin(info_serialized, info.mrt_mask); + + // Fetch + + writeBin(info_serialized, static_cast(info.has_fetch_shader ? 1 : 0)); + writeBin(info_serialized, info.fetch_shader_sgpr_base); + + // Stage + writeBin(info_serialized, info.stage); + writeBin(info_serialized, info.l_stage); + writeBin(info_serialized, info.pgm_hash); + + // AttributeFlags for loads + u32 loads_size = static_cast(info.loads.flags.size()); + writeBin(info_serialized, loads_size); + for (size_t i = 0; i < info.loads.flags.size(); ++i) { + writeBin(info_serialized, info.loads.flags[i]); + } + + // AttributeFlags for stores + u32 stores_size = static_cast(info.stores.flags.size()); + writeBin(info_serialized, stores_size); + for (size_t i = 0; i < info.stores.flags.size(); ++i) { + writeBin(info_serialized, info.stores.flags[i]); + } + + // UserData + u32 userDataSize = static_cast(info.user_data.size()); + writeBin(info_serialized, userDataSize); + for (size_t i = 0; i < info.user_data.size(); ++i) { + writeBin(info_serialized, info.user_data[i]); + } + + // Pgm Base + writeBin(info_serialized, info.pgm_base); +} + +void DeserializeInfo(std::istream& info_serialized, Shader::Info& info) { + // UD Mask + readBin(info_serialized, info.ud_mask.mask); + + // Buffer-Resources + u32 bufferCount; + readBin(info_serialized, bufferCount); + + info.buffers.clear(); + info.buffers.reserve(bufferCount); + for (u32 i = 0; i < bufferCount; ++i) { + Shader::BufferResource buffer; + readBin(info_serialized, buffer.sharp_idx); + u32 used_types; + readBin(info_serialized, used_types); + buffer.used_types = static_cast(used_types); + u32 buffer_type; + readBin(info_serialized, buffer_type); + buffer.buffer_type = static_cast(buffer_type); + readBin(info_serialized, buffer.instance_attrib); + u8 is_written; + readBin(info_serialized, is_written); + buffer.is_written = (is_written == 1); + u8 is_formatted; + readBin(info_serialized, is_formatted); + buffer.is_formatted = (is_formatted == 1); + + u64 base_address; + readBin(info_serialized, base_address); + buffer.inline_cbuf.base_address = base_address; + + u64 padding0; + readBin(info_serialized, padding0); + buffer.inline_cbuf._padding0 = padding0; + + u64 stride; + readBin(info_serialized, stride); + buffer.inline_cbuf.stride = stride; + + u64 cache_swizzle; + readBin(info_serialized, cache_swizzle); + buffer.inline_cbuf.cache_swizzle = cache_swizzle; + + u64 swizzle_enable; + readBin(info_serialized, swizzle_enable); + buffer.inline_cbuf.swizzle_enable = swizzle_enable; + + readBin(info_serialized, buffer.inline_cbuf.num_records); + + u32 dst_sel_x; + readBin(info_serialized, dst_sel_x); + buffer.inline_cbuf.dst_sel_x = dst_sel_x; + + u32 dst_sel_y; + readBin(info_serialized, dst_sel_y); + buffer.inline_cbuf.dst_sel_y = dst_sel_y; + + u32 dst_sel_z; + readBin(info_serialized, dst_sel_z); + buffer.inline_cbuf.dst_sel_z = dst_sel_z; + + u32 dst_sel_w; + readBin(info_serialized, dst_sel_w); + buffer.inline_cbuf.dst_sel_w = dst_sel_w; + + u32 num_format; + readBin(info_serialized, num_format); + buffer.inline_cbuf.num_format = num_format; + + u32 data_format; + readBin(info_serialized, data_format); + buffer.inline_cbuf.data_format = data_format; + + u32 element_size; + readBin(info_serialized, element_size); + buffer.inline_cbuf.element_size = element_size; + + u32 index_stride; + readBin(info_serialized, index_stride); + buffer.inline_cbuf.index_stride = index_stride; + + u32 add_tid_enable; + readBin(info_serialized, add_tid_enable); + buffer.inline_cbuf.add_tid_enable = add_tid_enable; + + u32 padding1; + readBin(info_serialized, padding1); + buffer.inline_cbuf._padding1 = padding1; + + u32 type; + readBin(info_serialized, type); + buffer.inline_cbuf.type = type; + + info.buffers.push_back(std::move(buffer)); + } + + // Image-Resources + u32 imageCount; + readBin(info_serialized, imageCount); + + info.images.clear(); + info.images.reserve(imageCount); + for (u32 i = 0; i < imageCount; ++i) { + Shader::ImageResource image; + readBin(info_serialized, image.sharp_idx); + u8 is_depth; + readBin(info_serialized, is_depth); + image.is_depth = (is_depth == 1); + u8 is_atomic; + readBin(info_serialized, is_atomic); + image.is_atomic = (is_atomic == 1); + u8 is_array; + readBin(info_serialized, is_array); + image.is_array = (is_array == 1); + u8 is_written; + readBin(info_serialized, is_written); + image.is_written = (is_written == 1); + info.images.push_back(std::move(image)); + } + + // Sampler-Resources + u32 samplerCount; + readBin(info_serialized, samplerCount); + + info.samplers.clear(); + info.samplers.reserve(samplerCount); + for (u32 i = 0; i < samplerCount; ++i) + { + std::uint8_t tag; + readBin(info_serialized, tag); + + Shader::SamplerResource sampler{0, 0, false}; // Dummy-Init + + if (tag == 0) + { + u32 sharp_idx; + readBin(info_serialized, sharp_idx); + sampler.sampler = sharp_idx; + } + else + { + AmdGpu::Sampler hw_sampler; + readBin(info_serialized, hw_sampler); + sampler.sampler = hw_sampler; + } + + std::uint8_t packed; + readBin(info_serialized, packed); + + sampler.associated_image = packed & 0xF; + sampler.disable_aniso = (packed >> 4) & 0x1; + + info.samplers.push_back(std::move(sampler)); + } + + // FMask-Resources + u32 fmaskCount; + readBin(info_serialized, fmaskCount); + + info.fmasks.clear(); + info.fmasks.reserve(fmaskCount); + for (u32 i = 0; i < fmaskCount; ++i) { + Shader::FMaskResource fmask; + readBin(info_serialized, fmask.sharp_idx); + info.fmasks.push_back(std::move(fmask)); + } + + // GS Copy Data + u32 mapCount; + readBin(info_serialized, mapCount); + + info.gs_copy_data.attr_map.clear(); + for (u32 i = 0; i < mapCount; ++i) { + u32 loc; + u32 attribute_value; + u32 idx; + readBin(info_serialized, loc); + readBin(info_serialized, attribute_value); + readBin(info_serialized, idx); + Shader::IR::Attribute attribute = static_cast(attribute_value); + info.gs_copy_data.attr_map.emplace(loc, std::make_pair(attribute, idx)); + } + + // SRT Info + u32 srtCount; + readBin(info_serialized, srtCount); + + info.srt_info.srt_reservations.clear(); + info.srt_info.srt_reservations.resize(srtCount); + for (u32 i = 0; i < srtCount; ++i) { + auto& res = info.srt_info.srt_reservations[i]; + readBin(info_serialized, res.sgpr_base); + readBin(info_serialized, res.dword_offset); + readBin(info_serialized, res.num_dwords); + } + + readBin(info_serialized, info.srt_info.flattened_bufsize_dw); + + // Flat UD + + u32 flatCount; + readBin(info_serialized, flatCount); + + info.flattened_ud_buf.clear(); + u32 required_size = std::max(flatCount, info.srt_info.flattened_bufsize_dw); + info.flattened_ud_buf.resize(required_size); + + for (u32 i = 0; i < flatCount; ++i) { + readBin(info_serialized, info.flattened_ud_buf[i]); + } + + // Tessellation Data + readBin(info_serialized, info.tess_consts_ptr_base); + readBin(info_serialized, info.tess_consts_dword_offset); + + // Flags + u8 flag_value; + readBin(info_serialized, flag_value); + info.has_storage_images = (flag_value == 1); + readBin(info_serialized, flag_value); + info.has_discard = (flag_value == 1); + readBin(info_serialized, flag_value); + info.has_image_gather = (flag_value == 1); + readBin(info_serialized, flag_value); + info.has_image_query = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_lane_id = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_group_quad = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_group_ballot = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_fp16 = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_fp64 = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_pack_10_11_11 = (flag_value == 1); + readBin(info_serialized, flag_value); + info.uses_unpack_10_11_11 = (flag_value == 1); + readBin(info_serialized, flag_value); + info.stores_tess_level_outer = (flag_value == 1); + readBin(info_serialized, flag_value); + info.stores_tess_level_inner = (flag_value == 1); + readBin(info_serialized, flag_value); + info.translation_failed = (flag_value == 1); + + // MRT Mask + readBin(info_serialized, info.mrt_mask); + + // Fetch Shader + u8 has_fetch_shader; + readBin(info_serialized, has_fetch_shader); + info.has_fetch_shader = (has_fetch_shader == 1); + readBin(info_serialized, info.fetch_shader_sgpr_base); + + // Stage + readBin(info_serialized, info.stage); + readBin(info_serialized, info.l_stage); + readBin(info_serialized, info.pgm_hash); + + // AttributeFlags for loads + u32 loads_size; + readBin(info_serialized, loads_size); + for (size_t i = 0; i < loads_size; ++i) { + readBin(info_serialized, info.loads.flags[i]); + } + + // AttributeFlags for stores + u32 stores_size; + readBin(info_serialized, stores_size); + for (size_t i = 0; i < stores_size; ++i) { + readBin(info_serialized, info.stores.flags[i]); + } + + // UserData + u32 userDataSize; + readBin(info_serialized, userDataSize); + + static std::vector temp_user_data_storage; + temp_user_data_storage.clear(); + temp_user_data_storage.resize(userDataSize); + + for (u32 i = 0; i < userDataSize; ++i) { + readBin(info_serialized, temp_user_data_storage[i]); + } + + info.user_data = std::span(temp_user_data_storage); + + // Pgm Base + readBin(info_serialized, info.pgm_base); + + + // Check if there are any remaining bytes in the stream + if (info_serialized.peek() != EOF) { + LOG_WARNING(Render_Vulkan, "There are remaining bytes in the cache file"); + } +} + +bool CheckShaderCache(std::string shader_id) { + std::filesystem::path spirv_cache_file_path = shader_cache_dir / (shader_id + ".spv"); + std::filesystem::path resources_file_path = shader_cache_dir / (shader_id + ".resources"); + + if (!std::filesystem::exists(spirv_cache_file_path)) { + LOG_DEBUG(Render_Vulkan, "SPIR-V-Datei nicht gefunden: {}", spirv_cache_file_path.string()); + return false; + } + + if (!std::filesystem::exists(resources_file_path)) { + LOG_DEBUG(Render_Vulkan, "Ressourcendatei nicht gefunden: {}", + resources_file_path.string()); + return false; + } + + Common::FS::IOFile spirv_file(spirv_cache_file_path, Common::FS::FileAccessMode::Read); + Common::FS::IOFile resources_file(resources_file_path, Common::FS::FileAccessMode::Read); + + const bool spirv_valid = spirv_file.IsOpen() && spirv_file.GetSize() > 0; + const bool resources_valid = resources_file.IsOpen() && resources_file.GetSize() > 0; + + spirv_file.Close(); + resources_file.Close(); + + if (!spirv_valid || !resources_valid) { + LOG_WARNING(Render_Vulkan, "Invalid cache file for shader with ID: {}", shader_id); + if (std::filesystem::exists(spirv_cache_file_path)) { + std::filesystem::remove(spirv_cache_file_path); + } + if (std::filesystem::exists(resources_file_path)) { + std::filesystem::remove(resources_file_path); + } + return false; + } + + LOG_INFO(Render_Vulkan, "Found shader with ID {} in the cache", shader_id); + return true; +} + +void GetShader(std::string shader_id, Shader::Info& info, std::vector& spv) { + std::string spirv_cache_filename = shader_id + ".spv"; + std::filesystem::path spirv_cache_file_path = shader_cache_dir / spirv_cache_filename; + Common::FS::IOFile spirv_cache_file(spirv_cache_file_path, + Common::FS::FileAccessMode::Read); + spv.resize(spirv_cache_file.GetSize() / sizeof(u32)); + spirv_cache_file.Read(spv); + spirv_cache_file.Close(); + + std::filesystem::path resources_dump_file_path = shader_cache_dir / (shader_id + ".resources"); + Common::FS::IOFile resources_dump_file(resources_dump_file_path, + Common::FS::FileAccessMode::Read); + + std::vector resources_data; + resources_data.resize(resources_dump_file.GetSize()); + resources_dump_file.Read(resources_data); + resources_dump_file.Close(); + + std::istringstream combined_stream(std::string(resources_data.begin(), resources_data.end())); + + std::istringstream info_stream; + info_stream.str(std::string(resources_data.begin(), resources_data.end())); + DeserializeInfo(info_stream, info); + +} + +void AddShader(std::string shader_id, std::vector spv, std::ostream& info_serialized) { + std::string spirv_cache_filename = shader_id + ".spv"; + std::filesystem::path spirv_cache_file_path = shader_cache_dir / spirv_cache_filename; + Common::FS::IOFile shader_cache_file(spirv_cache_file_path, Common::FS::FileAccessMode::Write); + shader_cache_file.WriteSpan(std::span(spv)); + shader_cache_file.Close(); + + std::filesystem::path resources_dump_file_path = shader_cache_dir / (shader_id + ".resources"); + Common::FS::IOFile resources_dump_file(resources_dump_file_path, + Common::FS::FileAccessMode::Write); + + if (std::ostringstream* info_oss = dynamic_cast(&info_serialized)) { + std::string info_data = info_oss->str(); + resources_dump_file.WriteSpan(std::span(info_data.data(), info_data.size())); + } + + resources_dump_file.Close(); +} + +} // namespace ShaderCache \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/shader_cache.h b/src/video_core/renderer_vulkan/shader_cache.h new file mode 100644 index 000000000..d2320e455 --- /dev/null +++ b/src/video_core/renderer_vulkan/shader_cache.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include +#include +#include +#include "shader_recompiler/info.h" +#include + +namespace ShaderCache { + +u64 CalculateSpecializationHash(const Shader::StageSpecialization& spec); +void SerializeInfo( + std::ostream& info_serialized, Shader::Info info); +void DeserializeInfo(std::istream& info_serialized, Shader::Info& info); + +bool CheckShaderCache(std::string shader_id); +void GetShader(std::string shader_id, Shader::Info& info, std::vector& spv); +void AddShader(std::string shader_id, std::vector spv, std::ostream& info_serialized); + +} // namespace ShaderCache diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7dd468f9a..0e54c46d0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/renderer_vulkan/shader_cache.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_presenter.h" @@ -511,14 +512,37 @@ bool PipelineCache::RefreshComputeKey() { vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, - Shader::Backend::Bindings& binding) { + Shader::Backend::Bindings& binding, Shader::StageSpecialization spec) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); - DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); - auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); + + std::string shader_name = GetShaderName(info.stage, info.pgm_hash, perm_idx); + + std::vector spv; + std::string shader_id = std::to_string(::ShaderCache::CalculateSpecializationHash(spec)); + if (::ShaderCache::CheckShaderCache(shader_id)) { + LOG_INFO(Render_Vulkan, "Loaded shader {} {:#x} {} from cache", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation)" : ""); + ::ShaderCache::GetShader(shader_id, info, spv); + info.RefreshFlatBuf(); + } else { + LOG_INFO(Render_Vulkan, "Shader {} {:#x} {} not in cache", info.stage, + info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); + const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); + spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); + std::ostringstream info_serialized; + ::ShaderCache::SerializeInfo(info_serialized, info); + + ::ShaderCache::AddShader(shader_id, spv, info_serialized); + LOG_INFO(Render_Vulkan, "Shader ID: {}", shader_id); + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + + + LOG_INFO(Render_Vulkan, "Compiled shader {} {:#x} {} and saved it to cache", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation)" : ""); + } vk::ShaderModule module; @@ -533,6 +557,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim const auto name = GetShaderName(info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); + if (Config::collectShadersForDebug()) { DebugState.CollectShader(name, info.l_stage, module, spv, code, patch ? *patch : std::span{}, is_patched); @@ -544,13 +569,14 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag Shader::ShaderParams params, Shader::Backend::Bindings& binding) { auto runtime_info = BuildRuntimeInfo(stage, l_stage); - auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); + auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); // code in vs if (new_program) { it_pgm.value() = std::make_unique(stage, l_stage, params); auto& program = it_pgm.value(); auto start = binding; - const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); + Shader::StageSpecialization spec = + Shader::StageSpecialization(program->info, runtime_info, profile, start); + const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding, spec); program->AddPermut(module, std::move(spec)); return std::make_tuple(&program->info, module, spec.fetch_shader_data, HashCombine(params.hash, 0)); @@ -567,7 +593,7 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { auto new_info = Shader::Info(stage, l_stage, params); - module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding, spec); program->AddPermut(module, std::move(spec)); } else { info.AddBindings(binding); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ba3407b48..c20d06a7b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -82,7 +82,7 @@ private: std::string_view ext); vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, - Shader::Backend::Bindings& binding); + Shader::Backend::Bindings& binding, Shader::StageSpecialization spec); const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: