shader_jit: Compile nested loops

and use `T_NEAR` instead of the default in Compile_BREAKC
This commit is contained in:
SachinVin 2022-04-24 00:30:36 +05:30
parent 1382035d4d
commit 047e238d09
3 changed files with 155 additions and 31 deletions

View file

@ -7,28 +7,28 @@
#include <memory>
#include <catch2/catch.hpp>
#include <nihstro/inline_assembly.h>
#include "video_core/shader/shader_interpreter.h"
#include "video_core/shader/shader_jit_x64_compiler.h"
using float24 = Pica::float24;
using JitShader = Pica::Shader::JitShader;
using ShaderInterpreter = Pica::Shader::InterpreterEngine;
using DestRegister = nihstro::DestRegister;
using OpCode = nihstro::OpCode;
using SourceRegister = nihstro::SourceRegister;
using Type = nihstro::InlineAsm::Type;
static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::InlineAsm> code) {
static std::unique_ptr<Pica::Shader::ShaderSetup> CompileShaderSetup(
std::initializer_list<nihstro::InlineAsm> code) {
const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code);
std::array<u32, Pica::Shader::MAX_PROGRAM_CODE_LENGTH> program_code{};
std::array<u32, Pica::Shader::MAX_SWIZZLE_DATA_LENGTH> swizzle_data{};
auto shader = std::make_unique<Pica::Shader::ShaderSetup>();
std::transform(shbin.program.begin(), shbin.program.end(), program_code.begin(),
std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(),
[](const auto& x) { return x.hex; });
std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), swizzle_data.begin(),
[](const auto& x) { return x.hex; });
auto shader = std::make_unique<JitShader>();
shader->Compile(&program_code, &swizzle_data);
std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(),
shader->swizzle_data.begin(), [](const auto& x) { return x.hex; });
return shader;
}
@ -36,19 +36,32 @@ static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::I
class ShaderTest {
public:
explicit ShaderTest(std::initializer_list<nihstro::InlineAsm> code)
: shader(CompileShader(code)) {}
: shader_setup(CompileShaderSetup(code)) {
shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data);
}
float Run(float input) {
Pica::Shader::ShaderSetup shader_setup;
Pica::Shader::UnitState shader_unit;
shader_unit.registers.input[0].x = float24::FromFloat32(input);
shader->Run(shader_setup, shader_unit, 0);
RunJit(shader_unit, input);
return shader_unit.registers.output[0].x.ToFloat32();
}
void RunJit(Pica::Shader::UnitState& shader_unit, float input) {
shader_unit.registers.input[0].x = float24::FromFloat32(input);
shader_unit.registers.temporary[0].x = float24::FromFloat32(0);
shader_jit.Run(*shader_setup, shader_unit, 0);
}
void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) {
shader_unit.registers.input[0].x = float24::FromFloat32(input);
shader_unit.registers.temporary[0].x = float24::FromFloat32(0);
shader_interpreter.Run(*shader_setup, shader_unit);
}
public:
std::unique_ptr<JitShader> shader;
JitShader shader_jit;
ShaderInterpreter shader_interpreter;
std::unique_ptr<Pica::Shader::ShaderSetup> shader_setup;
};
TEST_CASE("LG2", "[video_core][shader][shader_jit]") {
@ -89,3 +102,108 @@ TEST_CASE("EX2", "[video_core][shader][shader_jit]") {
REQUIRE(shader.Run(79.7262742773f) == Approx(1.e24f));
REQUIRE(std::isinf(shader.Run(800.f)));
}
TEST_CASE("Nested Loop", "[video_core][shader][shader_jit]") {
const auto sh_input = SourceRegister::MakeInput(0);
const auto sh_temp = SourceRegister::MakeTemporary(0);
const auto sh_output = DestRegister::MakeOutput(0);
std::array<Common::Vec4<u8>, 2> loop_parms{Common::Vec4<u8>{4, 0, 1, 0},
Common::Vec4<u8>{4, 0, 1, 0}};
auto shader_test = ShaderTest({
// clang-format off
{OpCode::Id::LOOP, 0},
{OpCode::Id::LOOP, 1},
{OpCode::Id::ADD, sh_temp, sh_temp, sh_input},
{Type::EndLoop},
{Type::EndLoop},
{OpCode::Id::MOV, sh_output, sh_temp},
{OpCode::Id::END},
// clang-format on
});
shader_test.shader_setup->uniforms.i[0] = loop_parms[0];
shader_test.shader_setup->uniforms.i[1] = loop_parms[0];
const auto run_test_helper = [&shader_test](float input) {
Pica::Shader::UnitState shader_unit_jit;
Pica::Shader::UnitState shader_unit_inter;
shader_test.RunJit(shader_unit_jit, input);
shader_test.RunInterpreter(shader_unit_inter, input);
REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() ==
Approx(shader_unit_inter.registers.output[0].x.ToFloat32()));
REQUIRE(shader_unit_jit.address_registers[2] == shader_unit_inter.address_registers[2]);
};
{
// Sanity check
Pica::Shader::UnitState shader_unit_jit;
shader_test.RunJit(shader_unit_jit, 1.0f);
REQUIRE(shader_unit_jit.address_registers[2] == 6);
REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Approx(25.0f));
Pica::Shader::UnitState shader_unit_inter;
shader_test.RunInterpreter(shader_unit_inter, 2.0f);
REQUIRE(shader_unit_inter.address_registers[2] == 6);
REQUIRE(shader_unit_inter.registers.output[0].x.ToFloat32() == Approx(50.0f));
}
run_test_helper(-5.f);
run_test_helper(0.f);
run_test_helper(2.f);
run_test_helper(6.f);
run_test_helper(79.7262742773f);
}
TEST_CASE("Nested Loop Randomized", "[video_core][shader][shader_jit]") {
const auto sh_input = SourceRegister::MakeInput(0);
const auto sh_temp = SourceRegister::MakeTemporary(0);
const auto sh_output = DestRegister::MakeOutput(0);
auto shader_test = ShaderTest({
// clang-format off
{OpCode::Id::LOOP, 0},
{OpCode::Id::LOOP, 1},
{OpCode::Id::LOOP, 2},
{OpCode::Id::LOOP, 3},
{OpCode::Id::ADD, sh_temp, sh_temp, sh_input},
{Type::EndLoop},
{Type::EndLoop},
{Type::EndLoop},
{Type::EndLoop},
{OpCode::Id::MOV, sh_output, sh_temp},
{OpCode::Id::END},
// clang-format on
});
const auto generate_loop_parms = [] {
u8 iterations = 1 + rand();
u8 initial = 1 + rand();
u8 increment = 1 + rand();
Common::Vec4<u8> loop_parm{iterations, initial, increment, 0};
return Common::Vec4<u8>{iterations, initial, increment, 0};
};
const auto run_test_helper = [&shader_test](float input) {
Pica::Shader::UnitState shader_unit_jit;
Pica::Shader::UnitState shader_unit_inter;
shader_test.RunJit(shader_unit_jit, input);
shader_test.RunInterpreter(shader_unit_inter, input);
REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() ==
Approx(shader_unit_inter.registers.output[0].x.ToFloat32()));
REQUIRE(shader_unit_jit.address_registers[2] == shader_unit_inter.address_registers[2]);
};
srand(time(0));
for (int i = 0; i < 10; i++) {
shader_test.shader_setup->uniforms.i[0] = generate_loop_parms();
shader_test.shader_setup->uniforms.i[1] = generate_loop_parms();
shader_test.shader_setup->uniforms.i[2] = generate_loop_parms();
shader_test.shader_setup->uniforms.i[3] = generate_loop_parms();
float input = -(RAND_MAX / 2) + rand();
run_test_helper(input);
}
}