gl_shader_cache: Rework shader cache and remove post-specializations
Instead of pre-specializing shaders and then post-specializing them, drop the later and only "specialize" the shader while decoding it.
This commit is contained in:
parent
22e825a3bc
commit
bd8b9bbcee
19 changed files with 548 additions and 1100 deletions
|
@ -14,8 +14,9 @@ namespace VideoCommon::Shader {
|
|||
|
||||
using Tegra::Engines::SamplerDescriptor;
|
||||
|
||||
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
|
||||
: stage{shader_stage} {}
|
||||
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
|
||||
VideoCore::GuestDriverProfile stored_guest_driver_profile)
|
||||
: stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {}
|
||||
|
||||
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
|
||||
Tegra::Engines::ConstBufferEngineInterface& engine)
|
||||
|
@ -97,7 +98,7 @@ void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
|
|||
|
||||
bool ConstBufferLocker::IsConsistent() const {
|
||||
if (!engine) {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
return std::all_of(keys.begin(), keys.end(),
|
||||
[this](const auto& pair) {
|
||||
|
|
|
@ -26,7 +26,8 @@ using BindlessSamplerMap =
|
|||
*/
|
||||
class ConstBufferLocker {
|
||||
public:
|
||||
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
|
||||
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
|
||||
VideoCore::GuestDriverProfile stored_guest_driver_profile);
|
||||
|
||||
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
|
||||
Tegra::Engines::ConstBufferEngineInterface& engine);
|
||||
|
@ -83,15 +84,13 @@ public:
|
|||
}
|
||||
|
||||
/// Obtains access to the guest driver's profile.
|
||||
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
|
||||
if (engine) {
|
||||
return &engine->AccessGuestDriverProfile();
|
||||
}
|
||||
return nullptr;
|
||||
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
|
||||
return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
|
||||
}
|
||||
|
||||
private:
|
||||
const Tegra::Engines::ShaderType stage;
|
||||
VideoCore::GuestDriverProfile stored_guest_driver_profile;
|
||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||
KeyMap keys;
|
||||
BoundSamplerMap bound_samplers;
|
||||
|
|
|
@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
|
|||
return (absolute_offset % SchedPeriod) == 0;
|
||||
}
|
||||
|
||||
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
|
||||
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
|
||||
const std::list<Sampler>& used_samplers) {
|
||||
if (gpu_driver == nullptr) {
|
||||
LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
|
||||
return;
|
||||
}
|
||||
if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
|
||||
if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
|
||||
return;
|
||||
}
|
||||
u32 count{};
|
||||
|
@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
|
|||
bound_offsets.emplace_back(sampler.GetOffset());
|
||||
}
|
||||
if (count > 1) {
|
||||
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
|
||||
gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
|
||||
VideoCore::GuestDriverProfile* gpu_driver,
|
||||
VideoCore::GuestDriverProfile& gpu_driver,
|
||||
const std::list<Sampler>& used_samplers) {
|
||||
if (gpu_driver == nullptr) {
|
||||
LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32 base_offset = sampler_to_deduce.GetOffset();
|
||||
u32 max_offset{std::numeric_limits<u32>::max()};
|
||||
for (const auto& sampler : used_samplers) {
|
||||
|
@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
|
|||
if (max_offset == std::numeric_limits<u32>::max()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
|
||||
return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
|
|
@ -94,13 +94,10 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
|
|||
}
|
||||
auto [gpr, base_offset] = *pair;
|
||||
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
|
||||
auto gpu_driver = locker.AccessGuestDriverProfile();
|
||||
if (gpu_driver == nullptr) {
|
||||
return {};
|
||||
}
|
||||
const auto& gpu_driver = locker.AccessGuestDriverProfile();
|
||||
const u32 bindless_cv = NewCustomVariable();
|
||||
const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
|
||||
Immediate(gpu_driver->GetTextureHandlerSize()));
|
||||
const Node op =
|
||||
Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
|
||||
|
||||
const Node cv_node = GetCustomVariable(bindless_cv);
|
||||
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue