mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
(debug) add timings
This commit is contained in:
parent
ea65a4b1c8
commit
7486a39756
@ -2625,7 +2625,47 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// @ngxson : for debugging only
|
||||
int64_t t_pre_decode = 0;
|
||||
int64_t t_decode = 0;
|
||||
int64_t t_post_decode = 0;
|
||||
int64_t n_pre_decode = 0;
|
||||
int64_t n_decode = 0;
|
||||
int64_t n_post_decode = 0;
|
||||
#define DEBUG_TIMINGS
|
||||
#ifdef DEBUG_TIMINGS
|
||||
struct scoped_timer {
|
||||
int64_t & t;
|
||||
int64_t & n;
|
||||
int64_t t_start;
|
||||
scoped_timer(int64_t & t_, int64_t & n_) : t(t_), n(n_) {
|
||||
t_start = ggml_time_us();
|
||||
}
|
||||
~scoped_timer() {
|
||||
t += ggml_time_us() - t_start;
|
||||
n++;
|
||||
}
|
||||
};
|
||||
#else
|
||||
struct scoped_timer {
|
||||
scoped_timer(int64_t &, int64_t &) {}
|
||||
~scoped_timer() {}
|
||||
};
|
||||
#endif
|
||||
|
||||
void update_slots() {
|
||||
#ifdef DEBUG_TIMINGS
|
||||
static int64_t t_prev = 0;
|
||||
int64_t t_start = ggml_time_us();
|
||||
if (t_start - t_prev > 5 * 1000 * 1000) { // every 5 seconds
|
||||
t_prev = t_start;
|
||||
SRV_INF("n_pre_decode = %" PRId64 "\n", n_pre_decode);
|
||||
SRV_INF("avg t_pre_decode = %f ms\n", (double) t_pre_decode / n_pre_decode / 1000.0);
|
||||
SRV_INF("avg t_decode = %f ms\n", (double) t_decode / n_decode / 1000.0);
|
||||
SRV_INF("avg t_post_decode = %f ms\n", (double) t_post_decode / n_post_decode / 1000.0);
|
||||
}
|
||||
#endif
|
||||
|
||||
// check if all slots are idle
|
||||
{
|
||||
bool all_idle = true;
|
||||
@ -2651,6 +2691,7 @@ private:
|
||||
}
|
||||
|
||||
try {
|
||||
scoped_timer t(t_pre_decode, n_pre_decode);
|
||||
pre_decode();
|
||||
batch.render();
|
||||
} catch (const std::exception & e) {
|
||||
@ -2664,6 +2705,7 @@ private:
|
||||
for (int32_t off = 0; off < batch.size(); off = off_next) {
|
||||
const int32_t n_tokens = std::min(n_batch, batch.size() - off);
|
||||
try {
|
||||
scoped_timer t(t_decode, n_decode);
|
||||
// TODO @ngxson : maybe handle n_batch == 1 here instead of inside decode()
|
||||
|
||||
batch_view = batch.get_view(off, n_tokens);
|
||||
@ -2686,6 +2728,7 @@ private:
|
||||
}
|
||||
|
||||
try {
|
||||
scoped_timer t(t_post_decode, n_post_decode);
|
||||
post_decode(n_tokens, off, batch_view);
|
||||
} catch (const std::exception & e) {
|
||||
SRV_ERR("post_decode() failed: %s\n", e.what());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user