Add profiling infrastructure and widget
This commit is contained in:
parent
c1d29ac202
commit
cd1fbfcf1b
16 changed files with 757 additions and 0 deletions
|
@ -14,6 +14,7 @@ set(SRCS
|
|||
mem_arena.cpp
|
||||
memory_util.cpp
|
||||
misc.cpp
|
||||
profiler.cpp
|
||||
scm_rev.cpp
|
||||
string_util.cpp
|
||||
symbols.cpp
|
||||
|
@ -48,11 +49,14 @@ set(HEADERS
|
|||
mem_arena.h
|
||||
memory_util.h
|
||||
platform.h
|
||||
profiler.h
|
||||
profiler_reporting.h
|
||||
scm_rev.h
|
||||
scope_exit.h
|
||||
string_util.h
|
||||
swap.h
|
||||
symbols.h
|
||||
synchronized_wrapper.h
|
||||
thread.h
|
||||
thread_queue_list.h
|
||||
thunk.h
|
||||
|
|
159
src/common/profiler.cpp
Normal file
159
src/common/profiler.cpp
Normal file
|
@ -0,0 +1,159 @@
|
|||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/profiler.h"
|
||||
#include "common/profiler_reporting.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Common {
|
||||
namespace Profiling {
|
||||
|
||||
#if ENABLE_PROFILING
|
||||
thread_local Timer* Timer::current_timer = nullptr;
|
||||
#endif
|
||||
|
||||
TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
|
||||
: accumulated_duration(0) {
|
||||
|
||||
ProfilingManager& manager = GetProfilingManager();
|
||||
category_id = manager.RegisterTimingCategory(this, name);
|
||||
if (parent != nullptr)
|
||||
manager.SetTimingCategoryParent(category_id, parent->category_id);
|
||||
}
|
||||
|
||||
ProfilingManager::ProfilingManager()
|
||||
: last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
|
||||
}
|
||||
|
||||
unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
|
||||
TimingCategoryInfo info;
|
||||
info.category = category;
|
||||
info.name = name;
|
||||
info.parent = TimingCategoryInfo::NO_PARENT;
|
||||
|
||||
unsigned int id = (unsigned int)timing_categories.size();
|
||||
timing_categories.push_back(std::move(info));
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
|
||||
ASSERT(category < timing_categories.size());
|
||||
ASSERT(parent < timing_categories.size());
|
||||
|
||||
timing_categories[category].parent = parent;
|
||||
}
|
||||
|
||||
void ProfilingManager::BeginFrame() {
|
||||
this_frame_start = Clock::now();
|
||||
}
|
||||
|
||||
void ProfilingManager::FinishFrame() {
|
||||
Clock::time_point now = Clock::now();
|
||||
|
||||
results.interframe_time = now - last_frame_end;
|
||||
results.frame_time = now - this_frame_start;
|
||||
|
||||
results.time_per_category.resize(timing_categories.size());
|
||||
for (size_t i = 0; i < timing_categories.size(); ++i) {
|
||||
results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
|
||||
}
|
||||
|
||||
last_frame_end = now;
|
||||
}
|
||||
|
||||
TimingResultsAggregator::TimingResultsAggregator(size_t window_size)
|
||||
: max_window_size(window_size), window_size(0) {
|
||||
interframe_times.resize(window_size, Duration::zero());
|
||||
frame_times.resize(window_size, Duration::zero());
|
||||
}
|
||||
|
||||
void TimingResultsAggregator::Clear() {
|
||||
window_size = cursor = 0;
|
||||
}
|
||||
|
||||
void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
|
||||
size_t old_size = times_per_category.size();
|
||||
if (n == old_size)
|
||||
return;
|
||||
|
||||
times_per_category.resize(n);
|
||||
|
||||
for (size_t i = old_size; i < n; ++i) {
|
||||
times_per_category[i].resize(max_window_size, Duration::zero());
|
||||
}
|
||||
}
|
||||
|
||||
void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
|
||||
SetNumberOfCategories(frame_result.time_per_category.size());
|
||||
|
||||
interframe_times[cursor] = frame_result.interframe_time;
|
||||
frame_times[cursor] = frame_result.frame_time;
|
||||
for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
|
||||
times_per_category[i][cursor] = frame_result.time_per_category[i];
|
||||
}
|
||||
|
||||
++cursor;
|
||||
if (cursor == max_window_size)
|
||||
cursor = 0;
|
||||
if (window_size < max_window_size)
|
||||
++window_size;
|
||||
}
|
||||
|
||||
static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) {
|
||||
AggregatedDuration result;
|
||||
result.avg = Duration::zero();
|
||||
|
||||
result.min = result.max = (len == 0 ? Duration::zero() : v[0]);
|
||||
|
||||
for (size_t i = 1; i < len; ++i) {
|
||||
Duration value = v[i];
|
||||
result.avg += value;
|
||||
result.min = std::min(result.min, value);
|
||||
result.max = std::max(result.max, value);
|
||||
}
|
||||
if (len != 0)
|
||||
result.avg /= len;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static float tof(Common::Profiling::Duration dur) {
|
||||
using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
return std::chrono::duration_cast<FloatMs>(dur).count();
|
||||
}
|
||||
|
||||
AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
|
||||
AggregatedFrameResult result;
|
||||
|
||||
result.interframe_time = AggregateField(interframe_times, window_size);
|
||||
result.frame_time = AggregateField(frame_times, window_size);
|
||||
|
||||
if (result.interframe_time.avg != Duration::zero()) {
|
||||
result.fps = 1000.0f / tof(result.interframe_time.avg);
|
||||
} else {
|
||||
result.fps = 0.0f;
|
||||
}
|
||||
|
||||
result.time_per_category.resize(times_per_category.size());
|
||||
for (size_t i = 0; i < times_per_category.size(); ++i) {
|
||||
result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ProfilingManager& GetProfilingManager() {
|
||||
// Takes advantage of "magic" static initialization for race-free initialization.
|
||||
static ProfilingManager manager;
|
||||
return manager;
|
||||
}
|
||||
|
||||
SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() {
|
||||
static SynchronizedWrapper<TimingResultsAggregator> aggregator(30);
|
||||
return SynchronizedRef<TimingResultsAggregator>(aggregator);
|
||||
}
|
||||
|
||||
} // namespace Profiling
|
||||
} // namespace Common
|
134
src/common/profiler.h
Normal file
134
src/common/profiler.h
Normal file
|
@ -0,0 +1,134 @@
|
|||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/thread.h"
|
||||
|
||||
namespace Common {
|
||||
namespace Profiling {
|
||||
|
||||
// If this is defined to 0, it turns all Timers into no-ops.
|
||||
#ifndef ENABLE_PROFILING
|
||||
#define ENABLE_PROFILING 1
|
||||
#endif
|
||||
|
||||
using Duration = std::chrono::nanoseconds;
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
|
||||
/**
|
||||
* Represents a timing category that measured time can be accounted towards. Should be declared as a
|
||||
* global variable and passed to Timers.
|
||||
*/
|
||||
class TimingCategory final {
|
||||
public:
|
||||
TimingCategory(const char* name, TimingCategory* parent = nullptr);
|
||||
|
||||
unsigned int GetCategoryId() const {
|
||||
return category_id;
|
||||
}
|
||||
|
||||
/// Adds some time to this category. Can safely be called from multiple threads at the same time.
|
||||
void AddTime(Duration amount) {
|
||||
std::atomic_fetch_add_explicit(
|
||||
&accumulated_duration, amount.count(),
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically retrieves the accumulated measured time for this category and resets the counter
|
||||
* to zero. Can be safely called concurrently with AddTime.
|
||||
*/
|
||||
Duration GetAccumulatedTime() {
|
||||
return Duration(std::atomic_exchange_explicit(
|
||||
&accumulated_duration, (Duration::rep)0,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned int category_id;
|
||||
std::atomic<Duration::rep> accumulated_duration;
|
||||
};
|
||||
|
||||
/**
|
||||
* Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
|
||||
* TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
|
||||
* appropriately paired.
|
||||
*
|
||||
* When a Timer is started, it automatically pauses a previously running timer on the same thread,
|
||||
* which is resumed when it is stopped. As such, no special action needs to be taken to avoid
|
||||
* double-accounting of time on two categories.
|
||||
*/
|
||||
class Timer {
|
||||
public:
|
||||
Timer(TimingCategory& category) : category(category) {
|
||||
}
|
||||
|
||||
void Start() {
|
||||
#if ENABLE_PROFILING
|
||||
ASSERT(!running);
|
||||
previous_timer = current_timer;
|
||||
current_timer = this;
|
||||
if (previous_timer != nullptr)
|
||||
previous_timer->StopTiming();
|
||||
|
||||
StartTiming();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Stop() {
|
||||
#if ENABLE_PROFILING
|
||||
ASSERT(running);
|
||||
StopTiming();
|
||||
|
||||
if (previous_timer != nullptr)
|
||||
previous_timer->StartTiming();
|
||||
current_timer = previous_timer;
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#if ENABLE_PROFILING
|
||||
void StartTiming() {
|
||||
start = Clock::now();
|
||||
running = true;
|
||||
}
|
||||
|
||||
void StopTiming() {
|
||||
auto duration = Clock::now() - start;
|
||||
running = false;
|
||||
category.AddTime(std::chrono::duration_cast<Duration>(duration));
|
||||
}
|
||||
|
||||
Clock::time_point start;
|
||||
bool running = false;
|
||||
|
||||
Timer* previous_timer;
|
||||
static thread_local Timer* current_timer;
|
||||
#endif
|
||||
|
||||
TimingCategory& category;
|
||||
};
|
||||
|
||||
/**
|
||||
* A Timer that automatically starts timing when created and stops at the end of the scope. Should
|
||||
* be used in the majority of cases.
|
||||
*/
|
||||
class ScopeTimer : public Timer {
|
||||
public:
|
||||
ScopeTimer(TimingCategory& category) : Timer(category) {
|
||||
Start();
|
||||
}
|
||||
|
||||
~ScopeTimer() {
|
||||
Stop();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Profiling
|
||||
} // namespace Common
|
108
src/common/profiler_reporting.h
Normal file
108
src/common/profiler_reporting.h
Normal file
|
@ -0,0 +1,108 @@
|
|||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/profiler.h"
|
||||
#include "common/synchronized_wrapper.h"
|
||||
|
||||
namespace Common {
|
||||
namespace Profiling {
|
||||
|
||||
struct TimingCategoryInfo {
|
||||
static const unsigned int NO_PARENT = -1;
|
||||
|
||||
TimingCategory* category;
|
||||
const char* name;
|
||||
unsigned int parent;
|
||||
};
|
||||
|
||||
struct ProfilingFrameResult {
|
||||
/// Time since the last delivered frame
|
||||
Duration interframe_time;
|
||||
|
||||
/// Time spent processing a frame, excluding VSync
|
||||
Duration frame_time;
|
||||
|
||||
/// Total amount of time spent inside each category in this frame. Indexed by the category id
|
||||
std::vector<Duration> time_per_category;
|
||||
};
|
||||
|
||||
class ProfilingManager final {
|
||||
public:
|
||||
ProfilingManager();
|
||||
|
||||
unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
|
||||
void SetTimingCategoryParent(unsigned int category, unsigned int parent);
|
||||
|
||||
const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
|
||||
return timing_categories;
|
||||
}
|
||||
|
||||
/// This should be called after swapping screen buffers.
|
||||
void BeginFrame();
|
||||
/// This should be called before swapping screen buffers.
|
||||
void FinishFrame();
|
||||
|
||||
/// Get the timing results from the previous frame. This is updated when you call FinishFrame().
|
||||
const ProfilingFrameResult& GetPreviousFrameResults() const {
|
||||
return results;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<TimingCategoryInfo> timing_categories;
|
||||
Clock::time_point last_frame_end;
|
||||
Clock::time_point this_frame_start;
|
||||
|
||||
ProfilingFrameResult results;
|
||||
};
|
||||
|
||||
struct AggregatedDuration {
|
||||
Duration avg, min, max;
|
||||
};
|
||||
|
||||
struct AggregatedFrameResult {
|
||||
/// Time since the last delivered frame
|
||||
AggregatedDuration interframe_time;
|
||||
|
||||
/// Time spent processing a frame, excluding VSync
|
||||
AggregatedDuration frame_time;
|
||||
|
||||
float fps;
|
||||
|
||||
/// Total amount of time spent inside each category in this frame. Indexed by the category id
|
||||
std::vector<AggregatedDuration> time_per_category;
|
||||
};
|
||||
|
||||
class TimingResultsAggregator final {
|
||||
public:
|
||||
TimingResultsAggregator(size_t window_size);
|
||||
|
||||
void Clear();
|
||||
void SetNumberOfCategories(size_t n);
|
||||
|
||||
void AddFrame(const ProfilingFrameResult& frame_result);
|
||||
|
||||
AggregatedFrameResult GetAggregatedResults() const;
|
||||
|
||||
size_t max_window_size;
|
||||
size_t window_size;
|
||||
size_t cursor;
|
||||
|
||||
std::vector<Duration> interframe_times;
|
||||
std::vector<Duration> frame_times;
|
||||
std::vector<std::vector<Duration>> times_per_category;
|
||||
};
|
||||
|
||||
ProfilingManager& GetProfilingManager();
|
||||
SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator();
|
||||
|
||||
} // namespace Profiling
|
||||
} // namespace Common
|
69
src/common/synchronized_wrapper.h
Normal file
69
src/common/synchronized_wrapper.h
Normal file
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace Common {
|
||||
|
||||
/**
|
||||
* Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
|
||||
* one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
|
||||
* SyncronizedRef on this wrapper. Inspired by Rust's Mutex type (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
|
||||
*/
|
||||
template <typename T>
|
||||
class SynchronizedWrapper {
|
||||
public:
|
||||
template <typename... Args>
|
||||
SynchronizedWrapper(Args&&... args) :
|
||||
data(std::forward<Args>(args)...) {
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename U>
|
||||
friend class SynchronizedRef;
|
||||
|
||||
std::mutex mutex;
|
||||
T data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
|
||||
* greatly reduces the chance that someone will access the wrapped resource without locking the
|
||||
* mutex.
|
||||
*/
|
||||
template <typename T>
|
||||
class SynchronizedRef {
|
||||
public:
|
||||
SynchronizedRef(SynchronizedWrapper<T>& wrapper) : wrapper(&wrapper) {
|
||||
wrapper.mutex.lock();
|
||||
}
|
||||
|
||||
SynchronizedRef(SynchronizedRef&) = delete;
|
||||
SynchronizedRef(SynchronizedRef&& o) : wrapper(o.wrapper) {
|
||||
o.wrapper = nullptr;
|
||||
}
|
||||
|
||||
~SynchronizedRef() {
|
||||
if (wrapper)
|
||||
wrapper->mutex.unlock();
|
||||
}
|
||||
|
||||
SynchronizedRef& operator=(SynchronizedRef&) = delete;
|
||||
SynchronizedRef& operator=(SynchronizedRef&& o) {
|
||||
std::swap(wrapper, o.wrapper);
|
||||
}
|
||||
|
||||
T& operator*() { return wrapper->data; }
|
||||
const T& operator*() const { return wrapper->data; }
|
||||
|
||||
T* operator->() { return &wrapper->data; }
|
||||
const T* operator->() const { return &wrapper->data; }
|
||||
|
||||
private:
|
||||
SynchronizedWrapper<T>* wrapper;
|
||||
};
|
||||
|
||||
} // namespace Common
|
|
@ -24,6 +24,25 @@
|
|||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// Support for C++11's thread_local keyword was surprisingly spotty in compilers until very
|
||||
// recently. Fortunately, thread local variables have been well supported for compilers for a while,
|
||||
// but with semantics supporting only POD types, so we can use a few defines to get some amount of
|
||||
// backwards compat support.
|
||||
// WARNING: This only works correctly with POD types.
|
||||
#if defined(__clang__)
|
||||
# if !__has_feature(cxx_thread_local)
|
||||
# define thread_local __thread
|
||||
# endif
|
||||
#elif defined(__GNUC__)
|
||||
# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
|
||||
# define thread_local __thread
|
||||
# endif
|
||||
#elif defined(_MSC_VER)
|
||||
# if _MSC_VER < 1900
|
||||
# define thread_local __declspec(thread)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace Common
|
||||
{
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue