shader_ir: Implement VOTE
Implement VOTE using Nvidia's intrinsics. Documentation about these can be found here https://developer.nvidia.com/reading-between-threads-shader-intrinsics Instead of using portable ARB instructions I opted to use Nvidia intrinsics because these are the closest we have to how Tegra X1 hardware renders. To stub VOTE on non-Nvidia drivers (including nouveau) this commit simulates a GPU with a warp size of one, returning what is meaningful for the instruction being emulated: * anyThreadNV(value) -> value * allThreadsNV(value) -> value * allThreadsEqualNV(value) -> true ballotARB, also known as "uint64_t(activeThreadsNV())", emits VOTE.ANY Rd, PT, PT; on nouveau's compiler. This doesn't match exactly to Nvidia's code VOTE.ALL Rd, PT, PT; Which is emulated with activeThreadsNV() by this commit. In theory this shouldn't really matter since .ANY, .ALL and .EQ affect the predicates (set to PT on those cases) and not the registers.
This commit is contained in:
parent
b4a8cfbd00
commit
4e35177e23
13 changed files with 164 additions and 1 deletions
|
@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
|||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Warp, &ShaderIR::DecodeWarp},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
|
||||
{OpCode::Type::Image, &ShaderIR::DecodeImage},
|
||||
|
|
55
src/video_core/shader/decode/warp.cpp
Normal file
55
src/video_core/shader/decode/warp.cpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::VoteOperation;
|
||||
|
||||
namespace {
|
||||
OperationCode GetOperationCode(VoteOperation vote_op) {
|
||||
switch (vote_op) {
|
||||
case VoteOperation::All:
|
||||
return OperationCode::VoteAll;
|
||||
case VoteOperation::Any:
|
||||
return OperationCode::VoteAny;
|
||||
case VoteOperation::Eq:
|
||||
return OperationCode::VoteEqual;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
|
||||
return OperationCode::VoteAll;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::VOTE: {
|
||||
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
|
||||
const Node active = Operation(OperationCode::BallotThread, value);
|
||||
const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
|
||||
SetRegister(bb, instr.gpr0, active);
|
||||
SetPredicate(bb, instr.vote.dest_pred, vote);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
|
||||
break;
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
|
@ -168,6 +168,11 @@ enum class OperationCode {
|
|||
WorkGroupIdY, /// () -> uint
|
||||
WorkGroupIdZ, /// () -> uint
|
||||
|
||||
BallotThread, /// (bool) -> uint
|
||||
VoteAll, /// (bool) -> bool
|
||||
VoteAny, /// (bool) -> bool
|
||||
VoteEqual, /// (bool) -> bool
|
||||
|
||||
Amount,
|
||||
};
|
||||
|
||||
|
|
|
@ -167,6 +167,7 @@ private:
|
|||
u32 DecodeFfma(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeWarp(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeTexture(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeImage(NodeBlock& bb, u32 pc);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue