mirror of
https://git.huckle.dev/Huckles-Minecraft-Archive/4jcraft.git
synced 2026-06-04 23:26:54 +00:00
feat(profiling): add low-overhead in-engine frame profiler
This commit is contained in:
260
Minecraft.Client/Utils/FrameProfiler.cpp
Normal file
260
Minecraft.Client/Utils/FrameProfiler.cpp
Normal file
@@ -0,0 +1,260 @@
|
||||
#include "../Platform/stdafx.h"
|
||||
#include "FrameProfiler.h"
|
||||
|
||||
#ifdef ENABLE_FRAME_PROFILER
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <string_view>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define FRAME_PROFILER_NOINLINE __declspec(noinline)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define FRAME_PROFILER_NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
#define FRAME_PROFILER_NOINLINE
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
using FrameProfilerClock = std::chrono::steady_clock;
|
||||
using Bucket = FrameProfiler::Bucket;
|
||||
constexpr std::uint64_t kNsPerMs = 1000ULL * 1000ULL;
|
||||
constexpr std::uint64_t kReportIntervalNs = 1000ULL * 1000ULL * 1000ULL;
|
||||
constexpr std::size_t kBucketCount = FrameProfiler::BucketCount();
|
||||
constexpr auto kFalseTokens = std::to_array<std::string_view>({
|
||||
"0", "false", "False", "FALSE", "no", "No", "NO", "off", "Off", "OFF",
|
||||
});
|
||||
constexpr std::array<FrameProfiler::BucketDescriptor, kBucketCount>
|
||||
kBucketDescriptors = {{
|
||||
{Bucket::Frame, "frame"},
|
||||
{Bucket::World, "world"},
|
||||
{Bucket::Terrain, "terrain"},
|
||||
{Bucket::ChunkCull, "chunkCull"},
|
||||
{Bucket::ChunkCollect, "chunkCollect"},
|
||||
{Bucket::ChunkPlayback, "chunkPlayback"},
|
||||
{Bucket::ChunkDirtyScan, "chunkDirtyScan"},
|
||||
{Bucket::ChunkRebuildSchedule, "chunkRebuildSchedule"},
|
||||
{Bucket::ChunkRebuildBody, "chunkRebuildBody"},
|
||||
{Bucket::Entity, "entities"},
|
||||
{Bucket::Particle, "particles"},
|
||||
{Bucket::WeatherSky, "weather"},
|
||||
{Bucket::UIHud, "ui"},
|
||||
{Bucket::Lightmap, "lightmap"},
|
||||
}};
|
||||
|
||||
struct BucketTotals {
|
||||
std::uint64_t totalNs{};
|
||||
std::uint64_t maxNs{};
|
||||
std::uint64_t calls{};
|
||||
|
||||
void Record(std::uint64_t elapsedNs) noexcept {
|
||||
totalNs += elapsedNs;
|
||||
++calls;
|
||||
if (elapsedNs > maxNs) maxNs = elapsedNs;
|
||||
}
|
||||
|
||||
void Merge(const BucketTotals& other) noexcept {
|
||||
totalNs += other.totalNs;
|
||||
calls += other.calls;
|
||||
if (other.maxNs > maxNs) maxNs = other.maxNs;
|
||||
}
|
||||
};
|
||||
|
||||
struct AtomicBucketTotals {
|
||||
std::atomic<std::uint64_t> totalNs{0};
|
||||
std::atomic<std::uint64_t> maxNs{0};
|
||||
std::atomic<std::uint64_t> calls{0};
|
||||
};
|
||||
|
||||
struct ProfilerState {
|
||||
std::array<AtomicBucketTotals, kBucketCount> workerBuckets{};
|
||||
};
|
||||
|
||||
struct ThreadState {
|
||||
std::uint32_t frameScopeDepth{};
|
||||
std::uint64_t windowStartNs{};
|
||||
std::array<BucketTotals, kBucketCount> localBuckets{};
|
||||
};
|
||||
|
||||
constinit ProfilerState g_profilerState{};
|
||||
constinit thread_local ThreadState t_threadState{};
|
||||
|
||||
static_assert(kBucketDescriptors.size() == kBucketCount);
|
||||
|
||||
[[nodiscard]] inline std::uint64_t nowNs() noexcept {
|
||||
return static_cast<std::uint64_t>(
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
FrameProfilerClock::now().time_since_epoch())
|
||||
.count());
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr double nsToMs(std::uint64_t ns) noexcept {
|
||||
return static_cast<double>(ns) / static_cast<double>(kNsPerMs);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool envSaysDisabled(
|
||||
std::string_view value) noexcept {
|
||||
if (value.empty()) return false;
|
||||
|
||||
for (std::string_view falseToken : kFalseTokens) {
|
||||
if (value == falseToken) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void updateAtomicMax(std::atomic<std::uint64_t>& value,
|
||||
std::uint64_t candidate) noexcept {
|
||||
std::uint64_t current = value.load(std::memory_order_relaxed);
|
||||
while (current < candidate &&
|
||||
!value.compare_exchange_weak(current, candidate,
|
||||
std::memory_order_relaxed,
|
||||
std::memory_order_relaxed)) {
|
||||
}
|
||||
}
|
||||
|
||||
inline void recordWorkerBucket(Bucket bucket, std::uint64_t elapsedNs) noexcept {
|
||||
AtomicBucketTotals& state =
|
||||
g_profilerState.workerBuckets[FrameProfiler::BucketIndex(bucket)];
|
||||
state.totalNs.fetch_add(elapsedNs, std::memory_order_relaxed);
|
||||
state.calls.fetch_add(1, std::memory_order_relaxed);
|
||||
updateAtomicMax(state.maxNs, elapsedNs);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool isFrameThread() noexcept {
|
||||
return t_threadState.frameScopeDepth != 0;
|
||||
}
|
||||
|
||||
FRAME_PROFILER_NOINLINE
|
||||
[[nodiscard]] bool computeEnabled() noexcept {
|
||||
const char* const envValue = std::getenv("C4J_FRAME_PROFILER");
|
||||
if (envValue == nullptr) return true;
|
||||
return !envSaysDisabled(envValue);
|
||||
}
|
||||
|
||||
FRAME_PROFILER_NOINLINE void emitWindowReport(
|
||||
const std::array<BucketTotals, kBucketCount>& buckets) noexcept {
|
||||
const std::uint64_t frames =
|
||||
buckets[FrameProfiler::BucketIndex(Bucket::Frame)].calls;
|
||||
if (frames == 0) return;
|
||||
|
||||
std::fprintf(stderr,
|
||||
"[frame-prof] avg/frame(ms) frames=%llu",
|
||||
static_cast<unsigned long long>(frames));
|
||||
for (const auto& descriptor : kBucketDescriptors) {
|
||||
const BucketTotals& bucket =
|
||||
buckets[FrameProfiler::BucketIndex(descriptor.bucket)];
|
||||
const std::string_view label = descriptor.label;
|
||||
std::fprintf(stderr, " %.*s=%.2f", static_cast<int>(label.size()),
|
||||
label.data(), nsToMs(bucket.totalNs) / frames);
|
||||
}
|
||||
std::fputc('\n', stderr);
|
||||
|
||||
std::fputs("[frame-prof] max(ms)/calls", stderr);
|
||||
for (const auto& descriptor : kBucketDescriptors) {
|
||||
const BucketTotals& bucket =
|
||||
buckets[FrameProfiler::BucketIndex(descriptor.bucket)];
|
||||
const std::string_view label = descriptor.label;
|
||||
std::fprintf(stderr, " %.*s=%.2f/%llu",
|
||||
static_cast<int>(label.size()), label.data(),
|
||||
nsToMs(bucket.maxNs),
|
||||
static_cast<unsigned long long>(bucket.calls));
|
||||
}
|
||||
std::fputc('\n', stderr);
|
||||
std::fflush(stderr);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::array<BucketTotals, kBucketCount> snapshotAndResetWorkerBuckets()
|
||||
noexcept {
|
||||
std::array<BucketTotals, kBucketCount> snapshot = {};
|
||||
for (std::size_t i = 0; i < kBucketCount; ++i) {
|
||||
AtomicBucketTotals& workerBucket = g_profilerState.workerBuckets[i];
|
||||
snapshot[i].totalNs =
|
||||
workerBucket.totalNs.exchange(0, std::memory_order_relaxed);
|
||||
snapshot[i].maxNs =
|
||||
workerBucket.maxNs.exchange(0, std::memory_order_relaxed);
|
||||
snapshot[i].calls =
|
||||
workerBucket.calls.exchange(0, std::memory_order_relaxed);
|
||||
}
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool FrameProfiler::IsEnabled() noexcept {
|
||||
static const bool enabled = computeEnabled();
|
||||
return enabled;
|
||||
}
|
||||
|
||||
void FrameProfiler::Record(Bucket bucket, std::uint64_t elapsedNs) noexcept {
|
||||
if (isFrameThread()) {
|
||||
t_threadState.localBuckets[BucketIndex(bucket)].Record(elapsedNs);
|
||||
return;
|
||||
}
|
||||
|
||||
recordWorkerBucket(bucket, elapsedNs);
|
||||
}
|
||||
|
||||
void FrameProfiler::EndFrame(std::uint64_t elapsedNs) noexcept {
|
||||
Record(Bucket::Frame, elapsedNs);
|
||||
|
||||
ThreadState& threadState = t_threadState;
|
||||
const std::uint64_t now = nowNs();
|
||||
|
||||
if (threadState.windowStartNs == 0) {
|
||||
threadState.windowStartNs = now;
|
||||
return;
|
||||
}
|
||||
|
||||
if ((now - threadState.windowStartNs) < kReportIntervalNs) return;
|
||||
|
||||
std::array<BucketTotals, kBucketCount> combined = threadState.localBuckets;
|
||||
const auto workerSnapshot = snapshotAndResetWorkerBuckets();
|
||||
|
||||
for (std::size_t i = 0; i < kBucketCount; ++i) {
|
||||
combined[i].Merge(workerSnapshot[i]);
|
||||
}
|
||||
|
||||
emitWindowReport(combined);
|
||||
|
||||
threadState.windowStartNs = now;
|
||||
threadState.localBuckets = {};
|
||||
}
|
||||
|
||||
FrameProfiler::Scope::Scope(Bucket bucket) noexcept
|
||||
: m_startNs(0), m_bucket(bucket), m_enabled(FrameProfiler::IsEnabled()) {
|
||||
if (m_enabled) m_startNs = nowNs();
|
||||
}
|
||||
|
||||
FrameProfiler::Scope::~Scope() noexcept {
|
||||
if (!m_enabled) return;
|
||||
FrameProfiler::Record(m_bucket, nowNs() - m_startNs);
|
||||
}
|
||||
|
||||
FrameProfiler::FrameScope::FrameScope() noexcept
|
||||
: m_startNs(0), m_enabled(false) {
|
||||
if (!FrameProfiler::IsEnabled()) return;
|
||||
|
||||
m_enabled = (t_threadState.frameScopeDepth++ == 0);
|
||||
if (m_enabled) m_startNs = nowNs();
|
||||
}
|
||||
|
||||
FrameProfiler::FrameScope::~FrameScope() noexcept {
|
||||
if (!m_enabled) {
|
||||
if (t_threadState.frameScopeDepth > 0) {
|
||||
--t_threadState.frameScopeDepth;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
FrameProfiler::EndFrame(nowNs() - m_startNs);
|
||||
|
||||
if (t_threadState.frameScopeDepth > 0) {
|
||||
--t_threadState.frameScopeDepth;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user