core: Many things (#194)

* video_core: Add a few missed things

* libkernel: More proper memory mapped files

* memory: Fix tessellation buffer mapping

* Cuphead work

* sceKernelPollSema fix

* clang format

* fixed ngs2 lle loading and rtc lib

* draft pthreads keys implementation

* fixed return codes

* return error code if sceKernelLoadStartModule module is invalid

* re-enabled system modules and disable debug in libs.h

* Improve linux support

* fix windows build

* kernel: Rework keys

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
TheTurtle 2024-06-15 14:36:07 +03:00 committed by GitHub
parent 6a47f8ae50
commit c5d1d579b1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
67 changed files with 1406 additions and 307 deletions

View file

@ -160,7 +160,7 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
}
}
void Translator::S_AND_B64(const GcnInst& inst) {
void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
@ -175,7 +175,10 @@ void Translator::S_AND_B64(const GcnInst& inst) {
};
const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])};
const IR::U1 result = ir.LogicalAnd(src0, src1);
IR::U1 result = ir.LogicalAnd(src0, src1);
if (negate) {
result = ir.LogicalNot(result);
}
ir.SetScc(result);
switch (inst.dst[0].field) {
case OperandField::VccLo:

View file

@ -121,6 +121,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
case OperandField::ConstFloatNeg_2_0:
value = ir.Imm32(-2.0f);
break;
case OperandField::ConstFloatNeg_4_0:
value = ir.Imm32(-4.0f);
break;
case OperandField::VccLo:
if (force_flt) {
value = ir.BitCast<IR::F32>(ir.GetVccLo());
@ -304,6 +307,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MADAK_F32: // Yes these can share the opcode
translator.V_FMA_F32(inst);
break;
case Opcode::IMAGE_SAMPLE_LZ_O:
case Opcode::IMAGE_SAMPLE_C_LZ:
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
@ -372,6 +376,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_NLE_F32:
translator.V_CMP_F32(ConditionOp::GT, false, inst);
break;
case Opcode::V_CMP_NLT_F32:
translator.V_CMP_F32(ConditionOp::GE, false, inst);
break;
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
@ -563,7 +570,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.S_OR_B64(true, inst);
break;
case Opcode::S_AND_B64:
translator.S_AND_B64(inst);
translator.S_AND_B64(false, inst);
break;
case Opcode::S_NAND_B64:
translator.S_AND_B64(true, inst);
break;
case Opcode::V_LSHRREV_B32:
translator.V_LSHRREV_B32(inst);

View file

@ -41,7 +41,7 @@ public:
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
void S_MOV_B64(const GcnInst& inst);
void S_OR_B64(bool negate, const GcnInst& inst);
void S_AND_B64(const GcnInst& inst);
void S_AND_B64(bool negate, const GcnInst& inst);
void S_ADD_I32(const GcnInst& inst);
void S_AND_B32(const GcnInst& inst);
void S_LSHR_B32(const GcnInst& inst);

View file

@ -15,7 +15,8 @@ void Translator::V_SAD(const GcnInst& inst) {
}
void Translator::V_MAC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), GetSrc(inst.dst[0])));
SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true),
GetSrc(inst.dst[0], true)));
}
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
@ -134,13 +135,13 @@ void Translator::V_FLOOR_F32(const GcnInst& inst) {
}
void Translator::V_SUB_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])};
const IR::F32 src1{GetSrc(inst.src[1])};
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPSub(src0, src1));
}
void Translator::V_RCP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])};
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPRecip(src0));
}