(debug) add timings

This commit is contained in:
Xuan Son Nguyen 2026-06-20 20:12:05 +02:00
parent ea65a4b1c8
commit 7486a39756

View File

@ -2625,7 +2625,47 @@ private:
}
}
// @ngxson : for debugging only
int64_t t_pre_decode = 0;
int64_t t_decode = 0;
int64_t t_post_decode = 0;
int64_t n_pre_decode = 0;
int64_t n_decode = 0;
int64_t n_post_decode = 0;
#define DEBUG_TIMINGS
#ifdef DEBUG_TIMINGS
struct scoped_timer {
int64_t & t;
int64_t & n;
int64_t t_start;
scoped_timer(int64_t & t_, int64_t & n_) : t(t_), n(n_) {
t_start = ggml_time_us();
}
~scoped_timer() {
t += ggml_time_us() - t_start;
n++;
}
};
#else
struct scoped_timer {
scoped_timer(int64_t &, int64_t &) {}
~scoped_timer() {}
};
#endif
void update_slots() {
#ifdef DEBUG_TIMINGS
static int64_t t_prev = 0;
int64_t t_start = ggml_time_us();
if (t_start - t_prev > 5 * 1000 * 1000) { // every 5 seconds
t_prev = t_start;
SRV_INF("n_pre_decode = %" PRId64 "\n", n_pre_decode);
SRV_INF("avg t_pre_decode = %f ms\n", (double) t_pre_decode / n_pre_decode / 1000.0);
SRV_INF("avg t_decode = %f ms\n", (double) t_decode / n_decode / 1000.0);
SRV_INF("avg t_post_decode = %f ms\n", (double) t_post_decode / n_post_decode / 1000.0);
}
#endif
// check if all slots are idle
{
bool all_idle = true;
@ -2651,6 +2691,7 @@ private:
}
try {
scoped_timer t(t_pre_decode, n_pre_decode);
pre_decode();
batch.render();
} catch (const std::exception & e) {
@ -2664,6 +2705,7 @@ private:
for (int32_t off = 0; off < batch.size(); off = off_next) {
const int32_t n_tokens = std::min(n_batch, batch.size() - off);
try {
scoped_timer t(t_decode, n_decode);
// TODO @ngxson : maybe handle n_batch == 1 here instead of inside decode()
batch_view = batch.get_view(off, n_tokens);
@ -2686,6 +2728,7 @@ private:
}
try {
scoped_timer t(t_post_decode, n_post_decode);
post_decode(n_tokens, off, batch_view);
} catch (const std::exception & e) {
SRV_ERR("post_decode() failed: %s\n", e.what());