Sources: Run clang-format on everything.

This commit is contained in:
Emmanuel Gil Peyrot 2016-09-18 09:38:01 +09:00
parent fe948af095
commit dc8479928c
386 changed files with 19560 additions and 18080 deletions

View file

@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
const auto& output_register_map = g_state.regs.vs_output_attributes[index];
u32 semantics[4] = {
output_register_map.map_x, output_register_map.map_y,
output_register_map.map_z, output_register_map.map_w
};
u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
output_register_map.map_z, output_register_map.map_w};
for (unsigned comp = 0; comp < 4; ++comp) {
float24* out = ((float24*)&ret) + semantics[comp];
@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
index++;
}
// The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
// The hardware takes the absolute and saturates vertex colors like this, *before* doing
// interpolation
for (unsigned i = 0; i < 4; ++i) {
ret.color[i] = float24::FromFloat32(
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
}
LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
return ret;
}
@ -96,8 +95,9 @@ void ClearCache() {
void ShaderSetup::Setup() {
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) {
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
u64 cache_key =
(Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
auto iter = shader_map.find(cache_key);
if (iter != shader_map.end()) {
@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
const auto& attribute_register_map = config.input_register_map;
for (unsigned i = 0; i < num_attributes; i++)
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
state.conditional_code[0] = false;
state.conditional_code[1] = false;
@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
#else
RunInterpreter(setup, state, config.main_offset);
#endif // ARCHITECTURE_x86_64
}
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
const Regs::ShaderConfig& config,
const ShaderSetup& setup) {
UnitState<true> state;
state.debug.max_offset = 0;
@ -155,7 +156,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
boost::fill(state.registers.input, &dummy_register);
for (unsigned i = 0; i < num_attributes; i++)
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
state.conditional_code[0] = false;
state.conditional_code[1] = false;

View file

@ -94,46 +94,46 @@ struct OutputRegisters {
static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
// Helper structure used to keep track of data useful for inspection of shader emulation
template<bool full_debugging>
template <bool full_debugging>
struct DebugData;
template<>
template <>
struct DebugData<false> {
// TODO: Hide these behind and interface and move them to DebugData<true>
u32 max_offset; // maximum program counter ever reached
u32 max_offset; // maximum program counter ever reached
u32 max_opdesc_id; // maximum swizzle pattern index ever used
};
template<>
template <>
struct DebugData<true> {
// Records store the input and output operands of a particular instruction.
struct Record {
enum Type {
// Floating point arithmetic operands
SRC1 = 0x1,
SRC2 = 0x2,
SRC3 = 0x4,
SRC1 = 0x1,
SRC2 = 0x2,
SRC3 = 0x4,
// Initial and final output operand value
DEST_IN = 0x8,
DEST_OUT = 0x10,
DEST_IN = 0x8,
DEST_OUT = 0x10,
// Current and next instruction offset (in words)
CUR_INSTR = 0x20,
NEXT_INSTR = 0x40,
CUR_INSTR = 0x20,
NEXT_INSTR = 0x40,
// Output address register value
ADDR_REG_OUT = 0x80,
// Result of a comparison instruction
CMP_RESULT = 0x100,
CMP_RESULT = 0x100,
// Input values for conditional flow control instructions
COND_BOOL_IN = 0x200,
COND_CMP_IN = 0x400,
COND_CMP_IN = 0x400,
// Input values for a loop
LOOP_INT_IN = 0x800,
LOOP_INT_IN = 0x800,
};
Math::Vec4<float24> src1;
@ -156,7 +156,7 @@ struct DebugData<true> {
unsigned mask = 0;
};
u32 max_offset; // maximum program counter ever reached
u32 max_offset; // maximum program counter ever reached
u32 max_opdesc_id; // maximum swizzle pattern index ever used
// List of records for each executed shader instruction
@ -167,10 +167,10 @@ struct DebugData<true> {
using DebugDataRecord = DebugData<true>::Record;
// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
template<DebugDataRecord::Type type, typename ValueType>
template <DebugDataRecord::Type type, typename ValueType>
inline void SetField(DebugDataRecord& record, ValueType value);
template<>
template <>
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
record.src1.x = value[0];
record.src1.y = value[1];
@ -178,7 +178,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
record.src1.w = value[3];
}
template<>
template <>
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
record.src2.x = value[0];
record.src2.y = value[1];
@ -186,7 +186,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
record.src2.w = value[3];
}
template<>
template <>
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
record.src3.x = value[0];
record.src3.y = value[1];
@ -194,7 +194,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
record.src3.w = value[3];
}
template<>
template <>
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
record.dest_in.x = value[0];
record.dest_in.y = value[1];
@ -202,7 +202,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
record.dest_in.w = value[3];
}
template<>
template <>
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
record.dest_out.x = value[0];
record.dest_out.y = value[1];
@ -210,67 +210,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24
record.dest_out.w = value[3];
}
template<>
template <>
inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
record.address_registers[0] = value[0];
record.address_registers[1] = value[1];
}
template<>
template <>
inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
record.conditional_code[0] = value[0];
record.conditional_code[1] = value[1];
}
template<>
template <>
inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
record.cond_bool = value;
}
template<>
template <>
inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
record.cond_cmp[0] = value[0];
record.cond_cmp[1] = value[1];
}
template<>
template <>
inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
record.loop_int = value;
}
template<>
template <>
inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
record.instruction_offset = value;
}
template<>
template <>
inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
record.next_instruction = value;
}
// Helper function to set debug information on the current shader iteration.
template<DebugDataRecord::Type type, typename ValueType>
template <DebugDataRecord::Type type, typename ValueType>
inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
// Debugging disabled => nothing to do
}
template<DebugDataRecord::Type type, typename ValueType>
template <DebugDataRecord::Type type, typename ValueType>
inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
if (offset >= debug_data.records.size())
debug_data.records.resize(offset + 1);
SetField<type, ValueType>(debug_data.records[offset], value);
debug_data.records[offset].mask |= type;
SetField<type, ValueType>(debug_data.records[offset], value);
debug_data.records[offset].mask |= type;
}
/**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a
* single shader unit that processes all shaders serially. Putting the state information in a struct
* here will make it easier for us to parallelize the shader processing later.
*/
template<bool Debug>
template <bool Debug>
struct UnitState {
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
@ -293,10 +292,12 @@ struct UnitState {
static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Input:
return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
return offsetof(UnitState, registers.input) +
reg.GetIndex() * sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
return offsetof(UnitState, registers.temporary) +
reg.GetIndex() * sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@ -307,10 +308,12 @@ struct UnitState {
static size_t OutputOffset(const DestRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Output:
return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
return offsetof(UnitState, output_registers.value) +
reg.GetIndex() * sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
return offsetof(UnitState, registers.temporary) +
reg.GetIndex() * sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@ -336,13 +339,13 @@ struct ShaderSetup {
static size_t UniformOffset(RegisterType type, unsigned index) {
switch (type) {
case RegisterType::FloatUniform:
return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
case RegisterType::BoolUniform:
return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
case RegisterType::IntUniform:
return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
default:
UNREACHABLE();
@ -354,7 +357,8 @@ struct ShaderSetup {
std::array<u32, 1024> swizzle_data;
/**
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once
* per
* vertex, which would happen within the `Run` function).
*/
void Setup();
@ -375,8 +379,8 @@ struct ShaderSetup {
* @param setup Setup object for the shader pipeline
* @return Debug information for this shader with regards to the given vertex
*/
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
const Regs::ShaderConfig& config, const ShaderSetup& setup);
};
} // namespace Shader

View file

@ -40,7 +40,7 @@ struct CallStackElement {
u32 loop_address; // The address where we'll return to after each loop iteration
};
template<bool Debug>
template <bool Debug>
void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
// TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack;
@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
}
}
const Instruction instr = { program_code[program_counter] };
const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
const Instruction instr = {program_code[program_counter]};
const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]};
auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
u32 return_offset, u8 repeat_count, u8 loop_increment) {
program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset,
u32 num_instructions, u32 return_offset,
u8 repeat_count, u8 loop_increment) {
program_counter =
offset -
1; // -1 to make sure when incrementing the PC we end up at the correct offset
ASSERT(call_stack.size() < call_stack.capacity());
call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
call_stack.push_back(
{offset + num_instructions, return_offset, repeat_count, loop_increment, offset});
};
Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
if (iteration > 0)
@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
};
switch (instr.opcode.Value().GetInfo().type) {
case OpCode::Type::Arithmetic:
{
const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
case OpCode::Type::Arithmetic: {
const bool is_inverted =
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
const int address_offset = (instr.common.address_register_index == 0)
? 0 : state.address_registers[instr.common.address_register_index - 1];
const int address_offset =
(instr.common.address_register_index == 0)
? 0
: state.address_registers[instr.common.address_register_index - 1];
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset));
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset));
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
(!is_inverted * address_offset));
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
(is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
float24 src1[4] = {
src1_[(int)swizzle.GetSelectorSrc1(0)],
src1_[(int)swizzle.GetSelectorSrc1(1)],
src1_[(int)swizzle.GetSelectorSrc1(2)],
src1_[(int)swizzle.GetSelectorSrc1(3)],
src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
};
if (negate_src1) {
src1[0] = src1[0] * float24::FromFloat32(-1);
@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src1[3] = src1[3] * float24::FromFloat32(-1);
}
float24 src2[4] = {
src2_[(int)swizzle.GetSelectorSrc2(0)],
src2_[(int)swizzle.GetSelectorSrc2(1)],
src2_[(int)swizzle.GetSelectorSrc2(2)],
src2_[(int)swizzle.GetSelectorSrc2(3)],
src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
};
if (negate_src2) {
src2[0] = src2[0] * float24::FromFloat32(-1);
@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src2[3] = src2[3] * float24::FromFloat32(-1);
}
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
: (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
float24* dest =
(instr.common.dest.Value() < 0x10)
? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
: (instr.common.dest.Value() < 0x20)
? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
state.debug.max_opdesc_id =
std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id);
switch (instr.opcode.Value().EffectiveOpCode()) {
case OpCode::Id::ADD:
{
case OpCode::Id::ADD: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
case OpCode::Id::MUL:
{
case OpCode::Id::MUL: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
case OpCode::Id::DP3:
case OpCode::Id::DP4:
case OpCode::Id::DPH:
case OpCode::Id::DPHI:
{
case OpCode::Id::DPHI: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src1[3] = float24::FromFloat32(1.0f);
int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f));
float24 dot = std::inner_product(src1, src1 + num_components, src2,
float24::FromFloat32(0.f));
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
}
// Reciprocal
case OpCode::Id::RCP:
{
case OpCode::Id::RCP: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
}
// Reciprocal Square Root
case OpCode::Id::RSQ:
{
case OpCode::Id::RSQ: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
case OpCode::Id::MOVA:
{
case OpCode::Id::MOVA: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
for (int i = 0; i < 2; ++i) {
if (!swizzle.DestComponentEnabled(i))
@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
// TODO: Figure out how the rounding is done on hardware
state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
}
Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers);
Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration,
state.address_registers);
break;
}
case OpCode::Id::MOV:
{
case OpCode::Id::MOV: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
for (int i = 0; i < 4; ++i) {
@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
if (!swizzle.DestComponentEnabled(i))
continue;
dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
: float24::FromFloat32(0.0f);
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
if (!swizzle.DestComponentEnabled(i))
continue;
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
: float24::FromFloat32(0.0f);
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
switch (op) {
case Instruction::Common::CompareOpType::Equal:
state.conditional_code[i] = (src1[i] == src2[i]);
break;
case Instruction::Common::CompareOpType::Equal:
state.conditional_code[i] = (src1[i] == src2[i]);
break;
case Instruction::Common::CompareOpType::NotEqual:
state.conditional_code[i] = (src1[i] != src2[i]);
break;
case Instruction::Common::CompareOpType::NotEqual:
state.conditional_code[i] = (src1[i] != src2[i]);
break;
case Instruction::Common::CompareOpType::LessThan:
state.conditional_code[i] = (src1[i] < src2[i]);
break;
case Instruction::Common::CompareOpType::LessThan:
state.conditional_code[i] = (src1[i] < src2[i]);
break;
case Instruction::Common::CompareOpType::LessEqual:
state.conditional_code[i] = (src1[i] <= src2[i]);
break;
case Instruction::Common::CompareOpType::LessEqual:
state.conditional_code[i] = (src1[i] <= src2[i]);
break;
case Instruction::Common::CompareOpType::GreaterThan:
state.conditional_code[i] = (src1[i] > src2[i]);
break;
case Instruction::Common::CompareOpType::GreaterThan:
state.conditional_code[i] = (src1[i] > src2[i]);
break;
case Instruction::Common::CompareOpType::GreaterEqual:
state.conditional_code[i] = (src1[i] >= src2[i]);
break;
case Instruction::Common::CompareOpType::GreaterEqual:
state.conditional_code[i] = (src1[i] >= src2[i]);
break;
default:
LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
break;
default:
LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
break;
}
}
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
break;
case OpCode::Id::EX2:
{
case OpCode::Id::EX2: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
case OpCode::Id::LG2:
{
case OpCode::Id::LG2: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
(int)instr.opcode.Value().EffectiveOpCode(),
instr.opcode.Value().GetInfo().name, instr.hex);
DEBUG_ASSERT(false);
break;
}
@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
case OpCode::Type::MultiplyAdd:
{
case OpCode::Type::MultiplyAdd: {
if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
(instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]);
const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(
&swizzle_data[instr.mad.operand_desc_id]);
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
const int address_offset = (instr.mad.address_register_index == 0)
? 0 : state.address_registers[instr.mad.address_register_index - 1];
const int address_offset =
(instr.mad.address_register_index == 0)
? 0
: state.address_registers[instr.mad.address_register_index - 1];
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset));
const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset));
const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
(!is_inverted * address_offset));
const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
(is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
float24 src1[4] = {
src1_[(int)swizzle.GetSelectorSrc1(0)],
src1_[(int)swizzle.GetSelectorSrc1(1)],
src1_[(int)swizzle.GetSelectorSrc1(2)],
src1_[(int)swizzle.GetSelectorSrc1(3)],
src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
};
if (negate_src1) {
src1[0] = src1[0] * float24::FromFloat32(-1);
@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src1[3] = src1[3] * float24::FromFloat32(-1);
}
float24 src2[4] = {
src2_[(int)swizzle.GetSelectorSrc2(0)],
src2_[(int)swizzle.GetSelectorSrc2(1)],
src2_[(int)swizzle.GetSelectorSrc2(2)],
src2_[(int)swizzle.GetSelectorSrc2(3)],
src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
};
if (negate_src2) {
src2[0] = src2[0] * float24::FromFloat32(-1);
@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src2[3] = src2[3] * float24::FromFloat32(-1);
}
float24 src3[4] = {
src3_[(int)swizzle.GetSelectorSrc3(0)],
src3_[(int)swizzle.GetSelectorSrc3(1)],
src3_[(int)swizzle.GetSelectorSrc3(2)],
src3_[(int)swizzle.GetSelectorSrc3(3)],
src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)],
src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)],
};
if (negate_src3) {
src3[0] = src3[0] * float24::FromFloat32(-1);
@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src3[3] = src3[3] * float24::FromFloat32(-1);
}
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
float24* dest =
(instr.mad.dest.Value() < 0x10)
? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
: (instr.mad.dest.Value() < 0x20)
? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
} else {
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
(int)instr.opcode.Value().EffectiveOpCode(),
instr.opcode.Value().GetInfo().name, instr.hex);
}
break;
}
default:
{
static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
bool results[2] = { refx == state.conditional_code[0],
refy == state.conditional_code[1] };
default: {
static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy,
Instruction::FlowControlType flow_control) {
bool results[2] = {refx == state.conditional_code[0],
refy == state.conditional_code[1]};
switch (flow_control.op) {
case flow_control.Or:
@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
case OpCode::Id::JMPC:
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
state.conditional_code);
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
instr.flow_control)) {
program_counter = instr.flow_control.dest_offset - 1;
}
break;
case OpCode::Id::JMPU:
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
Record<DebugDataRecord::COND_BOOL_IN>(
state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
if (uniforms.b[instr.flow_control.bool_uniform_id] ==
!(instr.flow_control.num_instructions & 1)) {
program_counter = instr.flow_control.dest_offset - 1;
}
break;
case OpCode::Id::CALL:
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
program_counter + 1, 0, 0);
break;
case OpCode::Id::CALLU:
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
Record<DebugDataRecord::COND_BOOL_IN>(
state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
program_counter + 1, 0, 0);
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
program_counter + 1, 0, 0);
}
break;
case OpCode::Id::CALLC:
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
program_counter + 1, 0, 0);
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
state.conditional_code);
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
instr.flow_control)) {
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
program_counter + 1, 0, 0);
}
break;
@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
case OpCode::Id::IFU:
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
Record<DebugDataRecord::COND_BOOL_IN>(
state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
program_counter + 1,
call(state, program_counter + 1,
instr.flow_control.dest_offset - program_counter - 1,
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
0);
} else {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
0);
}
break;
case OpCode::Id::IFC:
{
case OpCode::Id::IFC: {
// TODO: Do we need to consider swizzlers here?
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
call(state,
program_counter + 1,
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
state.conditional_code);
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
instr.flow_control)) {
call(state, program_counter + 1,
instr.flow_control.dest_offset - program_counter - 1,
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
0);
} else {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
0);
}
break;
}
case OpCode::Id::LOOP:
{
case OpCode::Id::LOOP: {
Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
uniforms.i[instr.flow_control.int_uniform_id].y,
uniforms.i[instr.flow_control.int_uniform_id].z,
@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
state.address_registers[2] = loop_param.y;
Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
call(state,
program_counter + 1,
call(state, program_counter + 1,
instr.flow_control.dest_offset - program_counter + 1,
instr.flow_control.dest_offset + 1,
loop_param.x,
loop_param.z);
instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z);
break;
}
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
(int)instr.opcode.Value().EffectiveOpCode(),
instr.opcode.Value().GetInfo().name, instr.hex);
break;
}

View file

@ -8,9 +8,10 @@ namespace Pica {
namespace Shader {
template <bool Debug> struct UnitState;
template <bool Debug>
struct UnitState;
template<bool Debug>
template <bool Debug>
void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
} // namespace

View file

@ -31,70 +31,70 @@ using namespace Gen;
typedef void (JitShader::*JitFunction)(Instruction instr);
const JitFunction instr_table[64] = {
&JitShader::Compile_ADD, // add
&JitShader::Compile_DP3, // dp3
&JitShader::Compile_DP4, // dp4
&JitShader::Compile_DPH, // dph
nullptr, // unknown
&JitShader::Compile_EX2, // ex2
&JitShader::Compile_LG2, // lg2
nullptr, // unknown
&JitShader::Compile_MUL, // mul
&JitShader::Compile_SGE, // sge
&JitShader::Compile_SLT, // slt
&JitShader::Compile_FLR, // flr
&JitShader::Compile_MAX, // max
&JitShader::Compile_MIN, // min
&JitShader::Compile_RCP, // rcp
&JitShader::Compile_RSQ, // rsq
nullptr, // unknown
nullptr, // unknown
&JitShader::Compile_MOVA, // mova
&JitShader::Compile_MOV, // mov
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
&JitShader::Compile_DPH, // dphi
nullptr, // unknown
&JitShader::Compile_SGE, // sgei
&JitShader::Compile_SLT, // slti
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
&JitShader::Compile_NOP, // nop
&JitShader::Compile_END, // end
nullptr, // break
&JitShader::Compile_CALL, // call
&JitShader::Compile_CALLC, // callc
&JitShader::Compile_CALLU, // callu
&JitShader::Compile_IF, // ifu
&JitShader::Compile_IF, // ifc
&JitShader::Compile_LOOP, // loop
nullptr, // emit
nullptr, // sete
&JitShader::Compile_JMP, // jmpc
&JitShader::Compile_JMP, // jmpu
&JitShader::Compile_CMP, // cmp
&JitShader::Compile_CMP, // cmp
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_ADD, // add
&JitShader::Compile_DP3, // dp3
&JitShader::Compile_DP4, // dp4
&JitShader::Compile_DPH, // dph
nullptr, // unknown
&JitShader::Compile_EX2, // ex2
&JitShader::Compile_LG2, // lg2
nullptr, // unknown
&JitShader::Compile_MUL, // mul
&JitShader::Compile_SGE, // sge
&JitShader::Compile_SLT, // slt
&JitShader::Compile_FLR, // flr
&JitShader::Compile_MAX, // max
&JitShader::Compile_MIN, // min
&JitShader::Compile_RCP, // rcp
&JitShader::Compile_RSQ, // rsq
nullptr, // unknown
nullptr, // unknown
&JitShader::Compile_MOVA, // mova
&JitShader::Compile_MOV, // mov
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
&JitShader::Compile_DPH, // dphi
nullptr, // unknown
&JitShader::Compile_SGE, // sgei
&JitShader::Compile_SLT, // slti
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
&JitShader::Compile_NOP, // nop
&JitShader::Compile_END, // end
nullptr, // break
&JitShader::Compile_CALL, // call
&JitShader::Compile_CALLC, // callc
&JitShader::Compile_CALLU, // callu
&JitShader::Compile_IF, // ifu
&JitShader::Compile_IF, // ifc
&JitShader::Compile_LOOP, // loop
nullptr, // emit
nullptr, // sete
&JitShader::Compile_JMP, // jmpc
&JitShader::Compile_JMP, // jmpu
&JitShader::Compile_CMP, // cmp
&JitShader::Compile_CMP, // cmp
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // madi
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
&JitShader::Compile_MAD, // mad
};
// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15;
// State registers that must not be modified by external functions calls
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = {
SETUP, STATE, // Pointers to register blocks
SETUP, STATE, // Pointers to register blocks
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
ONE+16, NEGBIT+16, // Constants
ONE + 16, NEGBIT + 16, // Constants
};
/// Raw constant for the source register selector that indicates no swizzling is performed
@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
* @return Instruction at the specified offset
*/
static Instruction GetVertexShaderInstruction(size_t offset) {
return { g_state.vs.program_code[offset] };
return {g_state.vs.program_code[offset]};
}
static void LogCritical(const char* msg) {
@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
* @param src_reg SourceRegister object corresponding to the source register to load
* @param dest Destination XMM register to store the loaded, swizzled source register
*/
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
X64Reg dest) {
X64Reg src_ptr;
size_t src_offset;
@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
unsigned operand_desc_id;
const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
const bool is_inverted =
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
unsigned address_register_index;
unsigned offset_src;
@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
}
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
// Generate instructions for source register swizzling as needed
u8 sel = swiz.GetRawSelector(src_num);
@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
}
// If the source register should be negated, flip the negative bit using XOR
const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 };
const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
if (negate[src_num - 1]) {
XORPS(dest, R(NEGBIT));
}
}
void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) {
DestRegister dest;
unsigned operand_desc_id;
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
dest = instr.common.dest.Value();
}
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type");
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest),
"Destinaton offset too large for int type");
// If all components are enabled, write the result to the destination register
if (swiz.dest_mask == NO_DEST_REG_MASK) {
@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
MOVAPS(MDisp(STATE, dest_offset_disp), src);
} else {
// Not all components are enabled, so mask the result when storing to the destination register...
// Not all components are enabled, so mask the result when storing to the destination
// register...
MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
if (Common::GetCPUCaps().sse4_1) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
BLENDPS(SCRATCH, R(src), mask);
} else {
MOVAPS(SCRATCH2, R(src));
UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
// Compute selector to selectively copy source components to destination for SHUFPS instruction
// Compute selector to selectively copy source components to destination for SHUFPS
// instruction
u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
}
void JitShader::Compile_UniformCondition(Instruction instr) {
int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
int offset =
ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
}
@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) {
}
void JitShader::Compile_MOVA(Instruction instr) {
SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
return; // NoOp
@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) {
// Call the subroutine
FixupBranch b = CALL();
fixup_branches.push_back({ b, instr.flow_control.dest_offset });
fixup_branches.push_back({b, instr.flow_control.dest_offset});
// Skip over the return offset that's on the stack
ADD(64, R(RSP), Imm32(8));
@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) {
// SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
// emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
// because they don't match when used with NaNs.
static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE };
static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) {
}
void JitShader::Compile_IF(Instruction instr) {
Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
Compile_Assert(instr.flow_control.dest_offset >= program_counter,
"Backwards if-statements not supported");
// Evaluate the "IF" condition
if (instr.opcode.Value() == OpCode::Id::IFU) {
@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) {
}
void JitShader::Compile_LOOP(Instruction instr) {
Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
Compile_Assert(instr.flow_control.dest_offset >= program_counter,
"Backwards loops not supported");
Compile_Assert(!looping, "Nested loops not supported");
looping = true;
int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
int offset =
ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
MOV(32, R(LOOPINC), R(LOOPCOUNT));
SHR(32, R(LOOPINC), Imm8(16));
MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
auto loop_start = GetCodePtr();
Compile_Block(instr.flow_control.dest_offset + 1);
ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
J_CC(CC_NZ, loop_start); // Loop if not equal
SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
J_CC(CC_NZ, loop_start); // Loop if not equal
looping = false;
}
@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) {
else
UNREACHABLE();
bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
(instr.flow_control.num_instructions & 1);
bool inverted_condition =
(instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
fixup_branches.push_back({ b, instr.flow_control.dest_offset });
fixup_branches.push_back({b, instr.flow_control.dest_offset});
}
void JitShader::Compile_Block(unsigned end) {
@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() {
Compile_Return();
}
ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
ASSERT_MSG(code_ptr[program_counter] == nullptr,
"Tried to compile already compiled shader location!");
code_ptr[program_counter] = GetCodePtr();
Instruction instr = GetVertexShaderInstruction(program_counter++);
@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() {
} else {
// Unhandled instruction
LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
instr.opcode.Value().EffectiveOpCode(), instr.hex);
instr.opcode.Value().EffectiveOpCode(), instr.hex);
}
}
@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() {
case OpCode::Id::CALL:
case OpCode::Id::CALLC:
case OpCode::Id::CALLU:
return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
return_offsets.push_back(instr.flow_control.dest_offset +
instr.flow_control.num_instructions);
break;
default:
break;
@ -835,12 +847,12 @@ void JitShader::Compile() {
XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
// Used to set a register to one
static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
static const __m128 one = {1.f, 1.f, 1.f, 1.f};
MOV(PTRBITS, R(RAX), ImmPtr(&one));
MOVAPS(ONE, MatR(RAX));
// Used to negate registers
static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
MOVAPS(NEGBIT, MatR(RAX));
@ -850,7 +862,8 @@ void JitShader::Compile() {
// Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
// Set the target for any incomplete branches now that the entire shader program has been emitted
// Set the target for any incomplete branches now that the entire shader program has been
// emitted
for (const auto& branch : fixup_branches) {
SetJumpTarget(branch.first, code_ptr[branch.second]);
}
@ -861,7 +874,8 @@ void JitShader::Compile() {
fixup_branches.clear();
fixup_branches.shrink_to_fit();
uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
uintptr_t size =
reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);

View file

@ -70,11 +70,11 @@ public:
void Compile_MAD(Instruction instr);
private:
void Compile_Block(unsigned end);
void Compile_NextInstr();
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
Gen::X64Reg dest);
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
/**
@ -111,8 +111,8 @@ private:
/// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets;
unsigned program_counter = 0; ///< Offset of the next instruction to decode
bool looping = false; ///< True if compiling a loop, used to check for nested loops
unsigned program_counter = 0; ///< Offset of the next instruction to decode
bool looping = false; ///< True if compiling a loop, used to check for nested loops
/// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;