Common: Implement WallClock Interface and implement a native clock for x64

This commit is contained in:
Fernando Sahmkow 2020-02-09 16:53:22 -04:00
parent 0f8e5a1465
commit 234b5ff6a9
10 changed files with 378 additions and 40 deletions

View file

@ -167,6 +167,8 @@ add_library(common STATIC
vector_math.h
virtual_buffer.cpp
virtual_buffer.h
wall_clock.cpp
wall_clock.h
web_result.h
zstd_compression.cpp
zstd_compression.h
@ -177,6 +179,8 @@ if(ARCHITECTURE_x86_64)
PRIVATE
x64/cpu_detect.cpp
x64/cpu_detect.h
x64/native_clock.cpp
x64/native_clock.h
x64/xbyak_abi.h
x64/xbyak_util.h
)

90
src/common/wall_clock.cpp Normal file
View file

@ -0,0 +1,90 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/uint128.h"
#include "common/wall_clock.h"
#ifdef ARCHITECTURE_x86_64
#include "common/x64/cpu_detect.h"
#include "common/x64/native_clock.h"
#endif
namespace Common {
using base_timer = std::chrono::steady_clock;
using base_time_point = std::chrono::time_point<base_timer>;
class StandardWallClock : public WallClock {
public:
StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
: WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
start_time = base_timer::now();
}
std::chrono::nanoseconds GetTimeNS() override {
base_time_point current = base_timer::now();
auto elapsed = current - start_time;
return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
}
std::chrono::microseconds GetTimeUS() override {
base_time_point current = base_timer::now();
auto elapsed = current - start_time;
return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
}
std::chrono::milliseconds GetTimeMS() override {
base_time_point current = base_timer::now();
auto elapsed = current - start_time;
return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
}
u64 GetClockCycles() override {
std::chrono::nanoseconds time_now = GetTimeNS();
const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
return Common::Divide128On32(temporal, 1000000000).first;
}
u64 GetCPUCycles() override {
std::chrono::nanoseconds time_now = GetTimeNS();
const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
return Common::Divide128On32(temporal, 1000000000).first;
}
private:
base_time_point start_time;
};
#ifdef ARCHITECTURE_x86_64
WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
const auto& caps = GetCPUCaps();
u64 rtsc_frequency = 0;
if (caps.invariant_tsc) {
if (caps.base_frequency != 0) {
rtsc_frequency = static_cast<u64>(caps.base_frequency) * 1000000U;
}
if (rtsc_frequency == 0) {
rtsc_frequency = EstimateRDTSCFrequency();
}
}
if (rtsc_frequency == 0) {
return static_cast<WallClock*>(
new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
} else {
return static_cast<WallClock*>(
new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency));
}
}
#else
WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
return static_cast<WallClock*>(
new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
}
#endif
} // namespace Common

40
src/common/wall_clock.h Normal file
View file

@ -0,0 +1,40 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <chrono>
#include "common/common_types.h"
namespace Common {
class WallClock {
public:
virtual std::chrono::nanoseconds GetTimeNS() = 0;
virtual std::chrono::microseconds GetTimeUS() = 0;
virtual std::chrono::milliseconds GetTimeMS() = 0;
virtual u64 GetClockCycles() = 0;
virtual u64 GetCPUCycles() = 0;
/// Tells if the wall clock, uses the host CPU's hardware clock
bool IsNative() const {
return is_native;
}
protected:
WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
: emulated_cpu_frequency{emulated_cpu_frequency},
emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
u64 emulated_cpu_frequency;
u64 emulated_clock_frequency;
private:
bool is_native;
};
WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
} // namespace Common

View file

@ -62,6 +62,17 @@ static CPUCaps Detect() {
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
caps.manufacturer = Manufacturer::Intel;
else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
caps.manufacturer = Manufacturer::AMD;
else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
caps.manufacturer = Manufacturer::Hygon;
else
caps.manufacturer = Manufacturer::Unknown;
u32 family = {};
u32 model = {};
__cpuid(cpu_id, 0x80000000);
@ -73,6 +84,14 @@ static CPUCaps Detect() {
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
family = (cpu_id[0] >> 8) & 0xf;
model = (cpu_id[0] >> 4) & 0xf;
if (family == 0xf) {
family += (cpu_id[0] >> 20) & 0xff;
}
if (family >= 6) {
model += ((cpu_id[0] >> 16) & 0xf) << 4;
}
if ((cpu_id[3] >> 25) & 1)
caps.sse = true;
@ -130,6 +149,20 @@ static CPUCaps Detect() {
caps.fma4 = true;
}
if (max_ex_fn >= 0x80000007) {
__cpuid(cpu_id, 0x80000007);
if (cpu_id[3] & (1 << 8)) {
caps.invariant_tsc = true;
}
}
if (max_std_fn >= 0x16) {
__cpuid(cpu_id, 0x16);
caps.base_frequency = cpu_id[0];
caps.max_frequency = cpu_id[1];
caps.bus_frequency = cpu_id[2];
}
return caps;
}

View file

@ -6,8 +6,16 @@
namespace Common {
enum class Manufacturer : u32 {
Intel = 0,
AMD = 1,
Hygon = 2,
Unknown = 3,
};
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
Manufacturer manufacturer;
char cpu_string[0x21];
char brand_string[0x41];
bool sse;
@ -24,6 +32,10 @@ struct CPUCaps {
bool fma;
bool fma4;
bool aes;
bool invariant_tsc;
u32 base_frequency;
u32 max_frequency;
u32 bus_frequency;
};
/**

View file

@ -0,0 +1,128 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <chrono>
#include <thread>
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include "common/x64/native_clock.h"
namespace Common {
#ifdef _MSC_VER
namespace {
struct uint128 {
u64 low;
u64 high;
};
u64 umuldiv64(u64 a, u64 b, u64 d) {
uint128 r{};
r.low = _umul128(a, b, &r.high);
u64 remainder;
return _udiv128(r.high, r.low, d, &remainder);
}
} // namespace
#else
namespace {
u64 umuldiv64(u64 a, u64 b, u64 d) {
const u64 diva = a / d;
const u64 moda = a % d;
const u64 divb = b / d;
const u64 modb = b % d;
return diva * b + moda * divb + moda * modb / d;
}
} // namespace
#endif
u64 EstimateRDTSCFrequency() {
const auto milli_10 = std::chrono::milliseconds{10};
// get current time
_mm_mfence();
const u64 tscStart = __rdtsc();
const auto startTime = std::chrono::high_resolution_clock::now();
// wait roughly 3 seconds
while (true) {
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - startTime);
if (milli.count() >= 3000)
break;
std::this_thread::sleep_for(milli_10);
}
const auto endTime = std::chrono::high_resolution_clock::now();
_mm_mfence();
const u64 tscEnd = __rdtsc();
// calculate difference
const u64 timer_diff =
std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
const u64 tsc_diff = tscEnd - tscStart;
const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff);
return tsc_freq;
}
namespace X64 {
NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
u64 rtsc_frequency)
: WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
rtsc_frequency} {
_mm_mfence();
last_measure = __rdtsc();
accumulated_ticks = 0U;
}
u64 NativeClock::GetRTSC() {
rtsc_serialize.lock();
_mm_mfence();
const u64 current_measure = __rdtsc();
u64 diff = current_measure - last_measure;
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
if (current_measure > last_measure) {
last_measure = current_measure;
}
accumulated_ticks += diff;
rtsc_serialize.unlock();
return accumulated_ticks;
}
std::chrono::nanoseconds NativeClock::GetTimeNS() {
const u64 rtsc_value = GetRTSC();
return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)};
}
std::chrono::microseconds NativeClock::GetTimeUS() {
const u64 rtsc_value = GetRTSC();
return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)};
}
std::chrono::milliseconds NativeClock::GetTimeMS() {
const u64 rtsc_value = GetRTSC();
return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)};
}
u64 NativeClock::GetClockCycles() {
const u64 rtsc_value = GetRTSC();
return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
}
u64 NativeClock::GetCPUCycles() {
const u64 rtsc_value = GetRTSC();
return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
}
} // namespace X64
} // namespace Common

View file

@ -0,0 +1,41 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include "common/spin_lock.h"
#include "common/wall_clock.h"
namespace Common {
namespace X64 {
class NativeClock : public WallClock {
public:
NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
std::chrono::nanoseconds GetTimeNS() override;
std::chrono::microseconds GetTimeUS() override;
std::chrono::milliseconds GetTimeMS() override;
u64 GetClockCycles() override;
u64 GetCPUCycles() override;
private:
u64 GetRTSC();
SpinLock rtsc_serialize{};
u64 last_measure{};
u64 accumulated_ticks{};
u64 rtsc_frequency;
};
} // namespace X64
u64 EstimateRDTSCFrequency();
} // namespace Common