Sources: Run clang-format on everything.
This commit is contained in:
parent
fe948af095
commit
dc8479928c
386 changed files with 19560 additions and 18080 deletions
|
@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
|
|||
|
||||
const auto& output_register_map = g_state.regs.vs_output_attributes[index];
|
||||
|
||||
u32 semantics[4] = {
|
||||
output_register_map.map_x, output_register_map.map_y,
|
||||
output_register_map.map_z, output_register_map.map_w
|
||||
};
|
||||
u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
|
||||
output_register_map.map_z, output_register_map.map_w};
|
||||
|
||||
for (unsigned comp = 0; comp < 4; ++comp) {
|
||||
float24* out = ((float24*)&ret) + semantics[comp];
|
||||
|
@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
|
|||
index++;
|
||||
}
|
||||
|
||||
// The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
|
||||
// The hardware takes the absolute and saturates vertex colors like this, *before* doing
|
||||
// interpolation
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
ret.color[i] = float24::FromFloat32(
|
||||
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
|
||||
ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
|
||||
}
|
||||
|
||||
LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
|
||||
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
|
||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
|
||||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
||||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
|
||||
ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
|
||||
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
|
||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
|
||||
ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
|
||||
ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
|
||||
ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
||||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
|
||||
ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -96,8 +95,9 @@ void ClearCache() {
|
|||
void ShaderSetup::Setup() {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
if (VideoCore::g_shader_jit_enabled) {
|
||||
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
|
||||
u64 cache_key =
|
||||
(Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
|
||||
|
||||
auto iter = shader_map.find(cache_key);
|
||||
if (iter != shader_map.end()) {
|
||||
|
@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
|
|||
const auto& attribute_register_map = config.input_register_map;
|
||||
|
||||
for (unsigned i = 0; i < num_attributes; i++)
|
||||
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
|
||||
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
|
||||
|
||||
state.conditional_code[0] = false;
|
||||
state.conditional_code[1] = false;
|
||||
|
@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
|
|||
#else
|
||||
RunInterpreter(setup, state, config.main_offset);
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
|
||||
}
|
||||
|
||||
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
|
||||
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
||||
const Regs::ShaderConfig& config,
|
||||
const ShaderSetup& setup) {
|
||||
UnitState<true> state;
|
||||
|
||||
state.debug.max_offset = 0;
|
||||
|
@ -155,7 +156,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
|
|||
boost::fill(state.registers.input, &dummy_register);
|
||||
|
||||
for (unsigned i = 0; i < num_attributes; i++)
|
||||
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
|
||||
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
|
||||
|
||||
state.conditional_code[0] = false;
|
||||
state.conditional_code[1] = false;
|
||||
|
|
|
@ -94,46 +94,46 @@ struct OutputRegisters {
|
|||
static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
|
||||
|
||||
// Helper structure used to keep track of data useful for inspection of shader emulation
|
||||
template<bool full_debugging>
|
||||
template <bool full_debugging>
|
||||
struct DebugData;
|
||||
|
||||
template<>
|
||||
template <>
|
||||
struct DebugData<false> {
|
||||
// TODO: Hide these behind and interface and move them to DebugData<true>
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||
};
|
||||
|
||||
template<>
|
||||
template <>
|
||||
struct DebugData<true> {
|
||||
// Records store the input and output operands of a particular instruction.
|
||||
struct Record {
|
||||
enum Type {
|
||||
// Floating point arithmetic operands
|
||||
SRC1 = 0x1,
|
||||
SRC2 = 0x2,
|
||||
SRC3 = 0x4,
|
||||
SRC1 = 0x1,
|
||||
SRC2 = 0x2,
|
||||
SRC3 = 0x4,
|
||||
|
||||
// Initial and final output operand value
|
||||
DEST_IN = 0x8,
|
||||
DEST_OUT = 0x10,
|
||||
DEST_IN = 0x8,
|
||||
DEST_OUT = 0x10,
|
||||
|
||||
// Current and next instruction offset (in words)
|
||||
CUR_INSTR = 0x20,
|
||||
NEXT_INSTR = 0x40,
|
||||
CUR_INSTR = 0x20,
|
||||
NEXT_INSTR = 0x40,
|
||||
|
||||
// Output address register value
|
||||
ADDR_REG_OUT = 0x80,
|
||||
|
||||
// Result of a comparison instruction
|
||||
CMP_RESULT = 0x100,
|
||||
CMP_RESULT = 0x100,
|
||||
|
||||
// Input values for conditional flow control instructions
|
||||
COND_BOOL_IN = 0x200,
|
||||
COND_CMP_IN = 0x400,
|
||||
COND_CMP_IN = 0x400,
|
||||
|
||||
// Input values for a loop
|
||||
LOOP_INT_IN = 0x800,
|
||||
LOOP_INT_IN = 0x800,
|
||||
};
|
||||
|
||||
Math::Vec4<float24> src1;
|
||||
|
@ -156,7 +156,7 @@ struct DebugData<true> {
|
|||
unsigned mask = 0;
|
||||
};
|
||||
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_offset; // maximum program counter ever reached
|
||||
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||
|
||||
// List of records for each executed shader instruction
|
||||
|
@ -167,10 +167,10 @@ struct DebugData<true> {
|
|||
using DebugDataRecord = DebugData<true>::Record;
|
||||
|
||||
// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
|
||||
template<DebugDataRecord::Type type, typename ValueType>
|
||||
template <DebugDataRecord::Type type, typename ValueType>
|
||||
inline void SetField(DebugDataRecord& record, ValueType value);
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
|
||||
record.src1.x = value[0];
|
||||
record.src1.y = value[1];
|
||||
|
@ -178,7 +178,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
|
|||
record.src1.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
|
||||
record.src2.x = value[0];
|
||||
record.src2.y = value[1];
|
||||
|
@ -186,7 +186,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
|
|||
record.src2.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
|
||||
record.src3.x = value[0];
|
||||
record.src3.y = value[1];
|
||||
|
@ -194,7 +194,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
|
|||
record.src3.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
|
||||
record.dest_in.x = value[0];
|
||||
record.dest_in.y = value[1];
|
||||
|
@ -202,7 +202,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
|
|||
record.dest_in.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
|
||||
record.dest_out.x = value[0];
|
||||
record.dest_out.y = value[1];
|
||||
|
@ -210,67 +210,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24
|
|||
record.dest_out.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
|
||||
record.address_registers[0] = value[0];
|
||||
record.address_registers[1] = value[1];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
|
||||
record.conditional_code[0] = value[0];
|
||||
record.conditional_code[1] = value[1];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
|
||||
record.cond_bool = value;
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
|
||||
record.cond_cmp[0] = value[0];
|
||||
record.cond_cmp[1] = value[1];
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
|
||||
record.loop_int = value;
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
|
||||
record.instruction_offset = value;
|
||||
}
|
||||
|
||||
template<>
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
|
||||
record.next_instruction = value;
|
||||
}
|
||||
|
||||
// Helper function to set debug information on the current shader iteration.
|
||||
template<DebugDataRecord::Type type, typename ValueType>
|
||||
template <DebugDataRecord::Type type, typename ValueType>
|
||||
inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
|
||||
// Debugging disabled => nothing to do
|
||||
}
|
||||
|
||||
template<DebugDataRecord::Type type, typename ValueType>
|
||||
template <DebugDataRecord::Type type, typename ValueType>
|
||||
inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
|
||||
if (offset >= debug_data.records.size())
|
||||
debug_data.records.resize(offset + 1);
|
||||
|
||||
SetField<type, ValueType>(debug_data.records[offset], value);
|
||||
debug_data.records[offset].mask |= type;
|
||||
SetField<type, ValueType>(debug_data.records[offset], value);
|
||||
debug_data.records[offset].mask |= type;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
|
||||
* has four shader units that process shaders in parallel. At the present, Citra only implements a
|
||||
* single shader unit that processes all shaders serially. Putting the state information in a struct
|
||||
* here will make it easier for us to parallelize the shader processing later.
|
||||
*/
|
||||
template<bool Debug>
|
||||
template <bool Debug>
|
||||
struct UnitState {
|
||||
struct Registers {
|
||||
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
||||
|
@ -293,10 +292,12 @@ struct UnitState {
|
|||
static size_t InputOffset(const SourceRegister& reg) {
|
||||
switch (reg.GetRegisterType()) {
|
||||
case RegisterType::Input:
|
||||
return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||
return offsetof(UnitState, registers.input) +
|
||||
reg.GetIndex() * sizeof(Math::Vec4<float24>);
|
||||
|
||||
case RegisterType::Temporary:
|
||||
return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||
return offsetof(UnitState, registers.temporary) +
|
||||
reg.GetIndex() * sizeof(Math::Vec4<float24>);
|
||||
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -307,10 +308,12 @@ struct UnitState {
|
|||
static size_t OutputOffset(const DestRegister& reg) {
|
||||
switch (reg.GetRegisterType()) {
|
||||
case RegisterType::Output:
|
||||
return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||
return offsetof(UnitState, output_registers.value) +
|
||||
reg.GetIndex() * sizeof(Math::Vec4<float24>);
|
||||
|
||||
case RegisterType::Temporary:
|
||||
return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||
return offsetof(UnitState, registers.temporary) +
|
||||
reg.GetIndex() * sizeof(Math::Vec4<float24>);
|
||||
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -336,13 +339,13 @@ struct ShaderSetup {
|
|||
static size_t UniformOffset(RegisterType type, unsigned index) {
|
||||
switch (type) {
|
||||
case RegisterType::FloatUniform:
|
||||
return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
|
||||
return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
|
||||
|
||||
case RegisterType::BoolUniform:
|
||||
return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
|
||||
return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
|
||||
|
||||
case RegisterType::IntUniform:
|
||||
return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
|
||||
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
|
||||
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -354,7 +357,8 @@ struct ShaderSetup {
|
|||
std::array<u32, 1024> swizzle_data;
|
||||
|
||||
/**
|
||||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
|
||||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once
|
||||
* per
|
||||
* vertex, which would happen within the `Run` function).
|
||||
*/
|
||||
void Setup();
|
||||
|
@ -375,8 +379,8 @@ struct ShaderSetup {
|
|||
* @param setup Setup object for the shader pipeline
|
||||
* @return Debug information for this shader with regards to the given vertex
|
||||
*/
|
||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
|
||||
|
||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
|
||||
const Regs::ShaderConfig& config, const ShaderSetup& setup);
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -40,7 +40,7 @@ struct CallStackElement {
|
|||
u32 loop_address; // The address where we'll return to after each loop iteration
|
||||
};
|
||||
|
||||
template<bool Debug>
|
||||
template <bool Debug>
|
||||
void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
|
||||
// TODO: Is there a maximal size for this?
|
||||
boost::container::static_vector<CallStackElement, 16> call_stack;
|
||||
|
@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
}
|
||||
}
|
||||
|
||||
const Instruction instr = { program_code[program_counter] };
|
||||
const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
|
||||
const Instruction instr = {program_code[program_counter]};
|
||||
const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]};
|
||||
|
||||
auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
|
||||
u32 return_offset, u8 repeat_count, u8 loop_increment) {
|
||||
program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
|
||||
auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset,
|
||||
u32 num_instructions, u32 return_offset,
|
||||
u8 repeat_count, u8 loop_increment) {
|
||||
program_counter =
|
||||
offset -
|
||||
1; // -1 to make sure when incrementing the PC we end up at the correct offset
|
||||
ASSERT(call_stack.size() < call_stack.capacity());
|
||||
call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
|
||||
call_stack.push_back(
|
||||
{offset + num_instructions, return_offset, repeat_count, loop_increment, offset});
|
||||
};
|
||||
Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
|
||||
if (iteration > 0)
|
||||
|
@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
};
|
||||
|
||||
switch (instr.opcode.Value().GetInfo().type) {
|
||||
case OpCode::Type::Arithmetic:
|
||||
{
|
||||
const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||
case OpCode::Type::Arithmetic: {
|
||||
const bool is_inverted =
|
||||
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||
|
||||
const int address_offset = (instr.common.address_register_index == 0)
|
||||
? 0 : state.address_registers[instr.common.address_register_index - 1];
|
||||
const int address_offset =
|
||||
(instr.common.address_register_index == 0)
|
||||
? 0
|
||||
: state.address_registers[instr.common.address_register_index - 1];
|
||||
|
||||
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset));
|
||||
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset));
|
||||
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
|
||||
(!is_inverted * address_offset));
|
||||
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
|
||||
(is_inverted * address_offset));
|
||||
|
||||
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
||||
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
||||
|
||||
float24 src1[4] = {
|
||||
src1_[(int)swizzle.GetSelectorSrc1(0)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(1)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(2)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(3)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
|
||||
};
|
||||
if (negate_src1) {
|
||||
src1[0] = src1[0] * float24::FromFloat32(-1);
|
||||
|
@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
src1[3] = src1[3] * float24::FromFloat32(-1);
|
||||
}
|
||||
float24 src2[4] = {
|
||||
src2_[(int)swizzle.GetSelectorSrc2(0)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(1)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(2)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(3)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
|
||||
};
|
||||
if (negate_src2) {
|
||||
src2[0] = src2[0] * float24::FromFloat32(-1);
|
||||
|
@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
src2[3] = src2[3] * float24::FromFloat32(-1);
|
||||
}
|
||||
|
||||
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
|
||||
: (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
float24* dest =
|
||||
(instr.common.dest.Value() < 0x10)
|
||||
? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
|
||||
: (instr.common.dest.Value() < 0x20)
|
||||
? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
|
||||
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||
state.debug.max_opdesc_id =
|
||||
std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id);
|
||||
|
||||
switch (instr.opcode.Value().EffectiveOpCode()) {
|
||||
case OpCode::Id::ADD:
|
||||
{
|
||||
case OpCode::Id::ADD: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
|
@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MUL:
|
||||
{
|
||||
case OpCode::Id::MUL: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
|
@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
case OpCode::Id::DP3:
|
||||
case OpCode::Id::DP4:
|
||||
case OpCode::Id::DPH:
|
||||
case OpCode::Id::DPHI:
|
||||
{
|
||||
case OpCode::Id::DPHI: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
|
@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
src1[3] = float24::FromFloat32(1.0f);
|
||||
|
||||
int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
|
||||
float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f));
|
||||
float24 dot = std::inner_product(src1, src1 + num_components, src2,
|
||||
float24::FromFloat32(0.f));
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
|
@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
}
|
||||
|
||||
// Reciprocal
|
||||
case OpCode::Id::RCP:
|
||||
{
|
||||
case OpCode::Id::RCP: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
|
||||
|
@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
}
|
||||
|
||||
// Reciprocal Square Root
|
||||
case OpCode::Id::RSQ:
|
||||
{
|
||||
case OpCode::Id::RSQ: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
|
||||
|
@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MOVA:
|
||||
{
|
||||
case OpCode::Id::MOVA: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
|
@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
// TODO: Figure out how the rounding is done on hardware
|
||||
state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
|
||||
}
|
||||
Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers);
|
||||
Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration,
|
||||
state.address_registers);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MOV:
|
||||
{
|
||||
case OpCode::Id::MOV: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
|
@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
|
||||
dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
|
||||
: float24::FromFloat32(0.0f);
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
|
@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
|
||||
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
|
||||
: float24::FromFloat32(0.0f);
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
break;
|
||||
|
@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
|
||||
|
||||
switch (op) {
|
||||
case Instruction::Common::CompareOpType::Equal:
|
||||
state.conditional_code[i] = (src1[i] == src2[i]);
|
||||
break;
|
||||
case Instruction::Common::CompareOpType::Equal:
|
||||
state.conditional_code[i] = (src1[i] == src2[i]);
|
||||
break;
|
||||
|
||||
case Instruction::Common::CompareOpType::NotEqual:
|
||||
state.conditional_code[i] = (src1[i] != src2[i]);
|
||||
break;
|
||||
case Instruction::Common::CompareOpType::NotEqual:
|
||||
state.conditional_code[i] = (src1[i] != src2[i]);
|
||||
break;
|
||||
|
||||
case Instruction::Common::CompareOpType::LessThan:
|
||||
state.conditional_code[i] = (src1[i] < src2[i]);
|
||||
break;
|
||||
case Instruction::Common::CompareOpType::LessThan:
|
||||
state.conditional_code[i] = (src1[i] < src2[i]);
|
||||
break;
|
||||
|
||||
case Instruction::Common::CompareOpType::LessEqual:
|
||||
state.conditional_code[i] = (src1[i] <= src2[i]);
|
||||
break;
|
||||
case Instruction::Common::CompareOpType::LessEqual:
|
||||
state.conditional_code[i] = (src1[i] <= src2[i]);
|
||||
break;
|
||||
|
||||
case Instruction::Common::CompareOpType::GreaterThan:
|
||||
state.conditional_code[i] = (src1[i] > src2[i]);
|
||||
break;
|
||||
case Instruction::Common::CompareOpType::GreaterThan:
|
||||
state.conditional_code[i] = (src1[i] > src2[i]);
|
||||
break;
|
||||
|
||||
case Instruction::Common::CompareOpType::GreaterEqual:
|
||||
state.conditional_code[i] = (src1[i] >= src2[i]);
|
||||
break;
|
||||
case Instruction::Common::CompareOpType::GreaterEqual:
|
||||
state.conditional_code[i] = (src1[i] >= src2[i]);
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
|
||||
break;
|
||||
}
|
||||
}
|
||||
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
|
||||
break;
|
||||
|
||||
case OpCode::Id::EX2:
|
||||
{
|
||||
case OpCode::Id::EX2: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
|
||||
|
@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::LG2:
|
||||
{
|
||||
case OpCode::Id::LG2: {
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
|
||||
|
||||
|
@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
(int)instr.opcode.Value().EffectiveOpCode(),
|
||||
instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
DEBUG_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
break;
|
||||
}
|
||||
|
||||
case OpCode::Type::MultiplyAdd:
|
||||
{
|
||||
case OpCode::Type::MultiplyAdd: {
|
||||
if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
|
||||
(instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
|
||||
const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]);
|
||||
const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(
|
||||
&swizzle_data[instr.mad.operand_desc_id]);
|
||||
|
||||
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
|
||||
|
||||
const int address_offset = (instr.mad.address_register_index == 0)
|
||||
? 0 : state.address_registers[instr.mad.address_register_index - 1];
|
||||
const int address_offset =
|
||||
(instr.mad.address_register_index == 0)
|
||||
? 0
|
||||
: state.address_registers[instr.mad.address_register_index - 1];
|
||||
|
||||
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
|
||||
const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset));
|
||||
const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset));
|
||||
const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
|
||||
(!is_inverted * address_offset));
|
||||
const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
|
||||
(is_inverted * address_offset));
|
||||
|
||||
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
||||
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
||||
const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
|
||||
|
||||
float24 src1[4] = {
|
||||
src1_[(int)swizzle.GetSelectorSrc1(0)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(1)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(2)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(3)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
|
||||
src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
|
||||
};
|
||||
if (negate_src1) {
|
||||
src1[0] = src1[0] * float24::FromFloat32(-1);
|
||||
|
@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
src1[3] = src1[3] * float24::FromFloat32(-1);
|
||||
}
|
||||
float24 src2[4] = {
|
||||
src2_[(int)swizzle.GetSelectorSrc2(0)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(1)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(2)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(3)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
|
||||
src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
|
||||
};
|
||||
if (negate_src2) {
|
||||
src2[0] = src2[0] * float24::FromFloat32(-1);
|
||||
|
@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
src2[3] = src2[3] * float24::FromFloat32(-1);
|
||||
}
|
||||
float24 src3[4] = {
|
||||
src3_[(int)swizzle.GetSelectorSrc3(0)],
|
||||
src3_[(int)swizzle.GetSelectorSrc3(1)],
|
||||
src3_[(int)swizzle.GetSelectorSrc3(2)],
|
||||
src3_[(int)swizzle.GetSelectorSrc3(3)],
|
||||
src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)],
|
||||
src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)],
|
||||
};
|
||||
if (negate_src3) {
|
||||
src3[0] = src3[0] * float24::FromFloat32(-1);
|
||||
|
@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
src3[3] = src3[3] * float24::FromFloat32(-1);
|
||||
}
|
||||
|
||||
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
|
||||
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
float24* dest =
|
||||
(instr.mad.dest.Value() < 0x10)
|
||||
? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
|
||||
: (instr.mad.dest.Value() < 0x20)
|
||||
? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
|
||||
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
|
||||
|
@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
(int)instr.opcode.Value().EffectiveOpCode(),
|
||||
instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
|
||||
bool results[2] = { refx == state.conditional_code[0],
|
||||
refy == state.conditional_code[1] };
|
||||
default: {
|
||||
static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy,
|
||||
Instruction::FlowControlType flow_control) {
|
||||
bool results[2] = {refx == state.conditional_code[0],
|
||||
refy == state.conditional_code[1]};
|
||||
|
||||
switch (flow_control.op) {
|
||||
case flow_control.Or:
|
||||
|
@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
break;
|
||||
|
||||
case OpCode::Id::JMPC:
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
|
||||
state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
|
||||
instr.flow_control)) {
|
||||
program_counter = instr.flow_control.dest_offset - 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case OpCode::Id::JMPU:
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(
|
||||
state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id] ==
|
||||
!(instr.flow_control.num_instructions & 1)) {
|
||||
program_counter = instr.flow_control.dest_offset - 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case OpCode::Id::CALL:
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
instr.flow_control.num_instructions,
|
||||
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
|
||||
program_counter + 1, 0, 0);
|
||||
break;
|
||||
|
||||
case OpCode::Id::CALLU:
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(
|
||||
state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
instr.flow_control.num_instructions,
|
||||
program_counter + 1, 0, 0);
|
||||
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
|
||||
program_counter + 1, 0, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OpCode::Id::CALLC:
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
instr.flow_control.num_instructions,
|
||||
program_counter + 1, 0, 0);
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
|
||||
state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
|
||||
instr.flow_control)) {
|
||||
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
|
||||
program_counter + 1, 0, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
break;
|
||||
|
||||
case OpCode::Id::IFU:
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
Record<DebugDataRecord::COND_BOOL_IN>(
|
||||
state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
||||
call(state,
|
||||
program_counter + 1,
|
||||
call(state, program_counter + 1,
|
||||
instr.flow_control.dest_offset - program_counter - 1,
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
|
||||
0);
|
||||
} else {
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
instr.flow_control.num_instructions,
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
||||
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
|
||||
0);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case OpCode::Id::IFC:
|
||||
{
|
||||
case OpCode::Id::IFC: {
|
||||
// TODO: Do we need to consider swizzlers here?
|
||||
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||
call(state,
|
||||
program_counter + 1,
|
||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
|
||||
state.conditional_code);
|
||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
|
||||
instr.flow_control)) {
|
||||
call(state, program_counter + 1,
|
||||
instr.flow_control.dest_offset - program_counter - 1,
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
|
||||
0);
|
||||
} else {
|
||||
call(state,
|
||||
instr.flow_control.dest_offset,
|
||||
instr.flow_control.num_instructions,
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
||||
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
|
||||
0);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::LOOP:
|
||||
{
|
||||
case OpCode::Id::LOOP: {
|
||||
Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].y,
|
||||
uniforms.i[instr.flow_control.int_uniform_id].z,
|
||||
|
@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
|||
state.address_registers[2] = loop_param.y;
|
||||
|
||||
Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
|
||||
call(state,
|
||||
program_counter + 1,
|
||||
call(state, program_counter + 1,
|
||||
instr.flow_control.dest_offset - program_counter + 1,
|
||||
instr.flow_control.dest_offset + 1,
|
||||
loop_param.x,
|
||||
loop_param.z);
|
||||
instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
(int)instr.opcode.Value().EffectiveOpCode(),
|
||||
instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,9 +8,10 @@ namespace Pica {
|
|||
|
||||
namespace Shader {
|
||||
|
||||
template <bool Debug> struct UnitState;
|
||||
template <bool Debug>
|
||||
struct UnitState;
|
||||
|
||||
template<bool Debug>
|
||||
template <bool Debug>
|
||||
void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -31,70 +31,70 @@ using namespace Gen;
|
|||
typedef void (JitShader::*JitFunction)(Instruction instr);
|
||||
|
||||
const JitFunction instr_table[64] = {
|
||||
&JitShader::Compile_ADD, // add
|
||||
&JitShader::Compile_DP3, // dp3
|
||||
&JitShader::Compile_DP4, // dp4
|
||||
&JitShader::Compile_DPH, // dph
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_EX2, // ex2
|
||||
&JitShader::Compile_LG2, // lg2
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_MUL, // mul
|
||||
&JitShader::Compile_SGE, // sge
|
||||
&JitShader::Compile_SLT, // slt
|
||||
&JitShader::Compile_FLR, // flr
|
||||
&JitShader::Compile_MAX, // max
|
||||
&JitShader::Compile_MIN, // min
|
||||
&JitShader::Compile_RCP, // rcp
|
||||
&JitShader::Compile_RSQ, // rsq
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_MOVA, // mova
|
||||
&JitShader::Compile_MOV, // mov
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_DPH, // dphi
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_SGE, // sgei
|
||||
&JitShader::Compile_SLT, // slti
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_NOP, // nop
|
||||
&JitShader::Compile_END, // end
|
||||
nullptr, // break
|
||||
&JitShader::Compile_CALL, // call
|
||||
&JitShader::Compile_CALLC, // callc
|
||||
&JitShader::Compile_CALLU, // callu
|
||||
&JitShader::Compile_IF, // ifu
|
||||
&JitShader::Compile_IF, // ifc
|
||||
&JitShader::Compile_LOOP, // loop
|
||||
nullptr, // emit
|
||||
nullptr, // sete
|
||||
&JitShader::Compile_JMP, // jmpc
|
||||
&JitShader::Compile_JMP, // jmpu
|
||||
&JitShader::Compile_CMP, // cmp
|
||||
&JitShader::Compile_CMP, // cmp
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_ADD, // add
|
||||
&JitShader::Compile_DP3, // dp3
|
||||
&JitShader::Compile_DP4, // dp4
|
||||
&JitShader::Compile_DPH, // dph
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_EX2, // ex2
|
||||
&JitShader::Compile_LG2, // lg2
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_MUL, // mul
|
||||
&JitShader::Compile_SGE, // sge
|
||||
&JitShader::Compile_SLT, // slt
|
||||
&JitShader::Compile_FLR, // flr
|
||||
&JitShader::Compile_MAX, // max
|
||||
&JitShader::Compile_MIN, // min
|
||||
&JitShader::Compile_RCP, // rcp
|
||||
&JitShader::Compile_RSQ, // rsq
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_MOVA, // mova
|
||||
&JitShader::Compile_MOV, // mov
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_DPH, // dphi
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_SGE, // sgei
|
||||
&JitShader::Compile_SLT, // slti
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
nullptr, // unknown
|
||||
&JitShader::Compile_NOP, // nop
|
||||
&JitShader::Compile_END, // end
|
||||
nullptr, // break
|
||||
&JitShader::Compile_CALL, // call
|
||||
&JitShader::Compile_CALLC, // callc
|
||||
&JitShader::Compile_CALLU, // callu
|
||||
&JitShader::Compile_IF, // ifu
|
||||
&JitShader::Compile_IF, // ifc
|
||||
&JitShader::Compile_LOOP, // loop
|
||||
nullptr, // emit
|
||||
nullptr, // sete
|
||||
&JitShader::Compile_JMP, // jmpc
|
||||
&JitShader::Compile_JMP, // jmpu
|
||||
&JitShader::Compile_CMP, // cmp
|
||||
&JitShader::Compile_CMP, // cmp
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // madi
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
&JitShader::Compile_MAD, // mad
|
||||
};
|
||||
|
||||
// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
|
||||
|
@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15;
|
|||
// State registers that must not be modified by external functions calls
|
||||
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
||||
static const BitSet32 persistent_regs = {
|
||||
SETUP, STATE, // Pointers to register blocks
|
||||
SETUP, STATE, // Pointers to register blocks
|
||||
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
|
||||
ONE+16, NEGBIT+16, // Constants
|
||||
ONE + 16, NEGBIT + 16, // Constants
|
||||
};
|
||||
|
||||
/// Raw constant for the source register selector that indicates no swizzling is performed
|
||||
|
@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
|
|||
* @return Instruction at the specified offset
|
||||
*/
|
||||
static Instruction GetVertexShaderInstruction(size_t offset) {
|
||||
return { g_state.vs.program_code[offset] };
|
||||
return {g_state.vs.program_code[offset]};
|
||||
}
|
||||
|
||||
static void LogCritical(const char* msg) {
|
||||
|
@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
|
|||
* @param src_reg SourceRegister object corresponding to the source register to load
|
||||
* @param dest Destination XMM register to store the loaded, swizzled source register
|
||||
*/
|
||||
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
|
||||
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
|
||||
X64Reg dest) {
|
||||
X64Reg src_ptr;
|
||||
size_t src_offset;
|
||||
|
||||
|
@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
|||
|
||||
unsigned operand_desc_id;
|
||||
|
||||
const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||
const bool is_inverted =
|
||||
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||
|
||||
unsigned address_register_index;
|
||||
unsigned offset_src;
|
||||
|
@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
|||
MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
|
||||
}
|
||||
|
||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
|
||||
SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
|
||||
|
||||
// Generate instructions for source register swizzling as needed
|
||||
u8 sel = swiz.GetRawSelector(src_num);
|
||||
|
@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
|||
}
|
||||
|
||||
// If the source register should be negated, flip the negative bit using XOR
|
||||
const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 };
|
||||
const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
|
||||
if (negate[src_num - 1]) {
|
||||
XORPS(dest, R(NEGBIT));
|
||||
}
|
||||
}
|
||||
|
||||
void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||
void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) {
|
||||
DestRegister dest;
|
||||
unsigned operand_desc_id;
|
||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
||||
|
@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||
dest = instr.common.dest.Value();
|
||||
}
|
||||
|
||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
|
||||
SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
|
||||
|
||||
int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
|
||||
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type");
|
||||
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest),
|
||||
"Destinaton offset too large for int type");
|
||||
|
||||
// If all components are enabled, write the result to the destination register
|
||||
if (swiz.dest_mask == NO_DEST_REG_MASK) {
|
||||
|
@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||
MOVAPS(MDisp(STATE, dest_offset_disp), src);
|
||||
|
||||
} else {
|
||||
// Not all components are enabled, so mask the result when storing to the destination register...
|
||||
// Not all components are enabled, so mask the result when storing to the destination
|
||||
// register...
|
||||
MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
|
||||
|
||||
if (Common::GetCPUCaps().sse4_1) {
|
||||
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
||||
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
|
||||
((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
||||
BLENDPS(SCRATCH, R(src), mask);
|
||||
} else {
|
||||
MOVAPS(SCRATCH2, R(src));
|
||||
UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
|
||||
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
|
||||
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
|
||||
|
||||
// Compute selector to selectively copy source components to destination for SHUFPS instruction
|
||||
// Compute selector to selectively copy source components to destination for SHUFPS
|
||||
// instruction
|
||||
u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
|
||||
((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
|
||||
((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
|
||||
|
@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
|
|||
}
|
||||
|
||||
void JitShader::Compile_UniformCondition(Instruction instr) {
|
||||
int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
|
||||
int offset =
|
||||
ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
|
||||
CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
|
||||
}
|
||||
|
||||
|
@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) {
|
|||
}
|
||||
|
||||
void JitShader::Compile_MOVA(Instruction instr) {
|
||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
|
||||
SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
|
||||
|
||||
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
|
||||
return; // NoOp
|
||||
|
@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) {
|
|||
|
||||
// Call the subroutine
|
||||
FixupBranch b = CALL();
|
||||
fixup_branches.push_back({ b, instr.flow_control.dest_offset });
|
||||
fixup_branches.push_back({b, instr.flow_control.dest_offset});
|
||||
|
||||
// Skip over the return offset that's on the stack
|
||||
ADD(64, R(RSP), Imm32(8));
|
||||
|
@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) {
|
|||
// SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
|
||||
// emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
|
||||
// because they don't match when used with NaNs.
|
||||
static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE };
|
||||
static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
|
||||
|
||||
bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
|
||||
Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
|
||||
|
@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) {
|
|||
}
|
||||
|
||||
void JitShader::Compile_IF(Instruction instr) {
|
||||
Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
|
||||
Compile_Assert(instr.flow_control.dest_offset >= program_counter,
|
||||
"Backwards if-statements not supported");
|
||||
|
||||
// Evaluate the "IF" condition
|
||||
if (instr.opcode.Value() == OpCode::Id::IFU) {
|
||||
|
@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) {
|
|||
}
|
||||
|
||||
void JitShader::Compile_LOOP(Instruction instr) {
|
||||
Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
|
||||
Compile_Assert(instr.flow_control.dest_offset >= program_counter,
|
||||
"Backwards loops not supported");
|
||||
Compile_Assert(!looping, "Nested loops not supported");
|
||||
|
||||
looping = true;
|
||||
|
||||
int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
|
||||
int offset =
|
||||
ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
|
||||
MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
|
||||
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
|
||||
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
|
||||
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
|
||||
MOV(32, R(LOOPINC), R(LOOPCOUNT));
|
||||
SHR(32, R(LOOPINC), Imm8(16));
|
||||
MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
|
||||
MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
|
||||
MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
|
||||
ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
|
||||
ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
|
||||
|
||||
auto loop_start = GetCodePtr();
|
||||
|
||||
Compile_Block(instr.flow_control.dest_offset + 1);
|
||||
|
||||
ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
|
||||
SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
|
||||
J_CC(CC_NZ, loop_start); // Loop if not equal
|
||||
SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
|
||||
J_CC(CC_NZ, loop_start); // Loop if not equal
|
||||
|
||||
looping = false;
|
||||
}
|
||||
|
@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) {
|
|||
else
|
||||
UNREACHABLE();
|
||||
|
||||
bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
|
||||
(instr.flow_control.num_instructions & 1);
|
||||
bool inverted_condition =
|
||||
(instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
|
||||
|
||||
FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
|
||||
fixup_branches.push_back({ b, instr.flow_control.dest_offset });
|
||||
fixup_branches.push_back({b, instr.flow_control.dest_offset});
|
||||
}
|
||||
|
||||
void JitShader::Compile_Block(unsigned end) {
|
||||
|
@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() {
|
|||
Compile_Return();
|
||||
}
|
||||
|
||||
ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
|
||||
ASSERT_MSG(code_ptr[program_counter] == nullptr,
|
||||
"Tried to compile already compiled shader location!");
|
||||
code_ptr[program_counter] = GetCodePtr();
|
||||
|
||||
Instruction instr = GetVertexShaderInstruction(program_counter++);
|
||||
|
@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() {
|
|||
} else {
|
||||
// Unhandled instruction
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
|
||||
instr.opcode.Value().EffectiveOpCode(), instr.hex);
|
||||
instr.opcode.Value().EffectiveOpCode(), instr.hex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() {
|
|||
case OpCode::Id::CALL:
|
||||
case OpCode::Id::CALLC:
|
||||
case OpCode::Id::CALLU:
|
||||
return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
|
||||
return_offsets.push_back(instr.flow_control.dest_offset +
|
||||
instr.flow_control.num_instructions);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -835,12 +847,12 @@ void JitShader::Compile() {
|
|||
XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
|
||||
|
||||
// Used to set a register to one
|
||||
static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
|
||||
static const __m128 one = {1.f, 1.f, 1.f, 1.f};
|
||||
MOV(PTRBITS, R(RAX), ImmPtr(&one));
|
||||
MOVAPS(ONE, MatR(RAX));
|
||||
|
||||
// Used to negate registers
|
||||
static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
|
||||
static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
|
||||
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
|
||||
MOVAPS(NEGBIT, MatR(RAX));
|
||||
|
||||
|
@ -850,7 +862,8 @@ void JitShader::Compile() {
|
|||
// Compile entire program
|
||||
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
|
||||
|
||||
// Set the target for any incomplete branches now that the entire shader program has been emitted
|
||||
// Set the target for any incomplete branches now that the entire shader program has been
|
||||
// emitted
|
||||
for (const auto& branch : fixup_branches) {
|
||||
SetJumpTarget(branch.first, code_ptr[branch.second]);
|
||||
}
|
||||
|
@ -861,7 +874,8 @@ void JitShader::Compile() {
|
|||
fixup_branches.clear();
|
||||
fixup_branches.shrink_to_fit();
|
||||
|
||||
uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
|
||||
uintptr_t size =
|
||||
reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
|
||||
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
||||
|
||||
LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
|
||||
|
|
|
@ -70,11 +70,11 @@ public:
|
|||
void Compile_MAD(Instruction instr);
|
||||
|
||||
private:
|
||||
|
||||
void Compile_Block(unsigned end);
|
||||
void Compile_NextInstr();
|
||||
|
||||
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
|
||||
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
|
||||
Gen::X64Reg dest);
|
||||
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
|
||||
|
||||
/**
|
||||
|
@ -111,8 +111,8 @@ private:
|
|||
/// Offsets in code where a return needs to be inserted
|
||||
std::vector<unsigned> return_offsets;
|
||||
|
||||
unsigned program_counter = 0; ///< Offset of the next instruction to decode
|
||||
bool looping = false; ///< True if compiling a loop, used to check for nested loops
|
||||
unsigned program_counter = 0; ///< Offset of the next instruction to decode
|
||||
bool looping = false; ///< True if compiling a loop, used to check for nested loops
|
||||
|
||||
/// Branches that need to be fixed up once the entire shader program is compiled
|
||||
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue