diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 48724245d7..62eb660d7f 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2625,7 +2625,47 @@ private: } } + // @ngxson : for debugging only + int64_t t_pre_decode = 0; + int64_t t_decode = 0; + int64_t t_post_decode = 0; + int64_t n_pre_decode = 0; + int64_t n_decode = 0; + int64_t n_post_decode = 0; +#define DEBUG_TIMINGS +#ifdef DEBUG_TIMINGS + struct scoped_timer { + int64_t & t; + int64_t & n; + int64_t t_start; + scoped_timer(int64_t & t_, int64_t & n_) : t(t_), n(n_) { + t_start = ggml_time_us(); + } + ~scoped_timer() { + t += ggml_time_us() - t_start; + n++; + } + }; +#else + struct scoped_timer { + scoped_timer(int64_t &, int64_t &) {} + ~scoped_timer() {} + }; +#endif + void update_slots() { +#ifdef DEBUG_TIMINGS + static int64_t t_prev = 0; + int64_t t_start = ggml_time_us(); + if (t_start - t_prev > 5 * 1000 * 1000) { // every 5 seconds + t_prev = t_start; + SRV_INF("n_pre_decode = %" PRId64 "\n", n_pre_decode); + SRV_INF("avg t_pre_decode = %f ms\n", (double) t_pre_decode / n_pre_decode / 1000.0); + SRV_INF("avg t_decode = %f ms\n", (double) t_decode / n_decode / 1000.0); + SRV_INF("avg t_post_decode = %f ms\n", (double) t_post_decode / n_post_decode / 1000.0); + } +#endif + // check if all slots are idle { bool all_idle = true; @@ -2651,6 +2691,7 @@ private: } try { + scoped_timer t(t_pre_decode, n_pre_decode); pre_decode(); batch.render(); } catch (const std::exception & e) { @@ -2664,6 +2705,7 @@ private: for (int32_t off = 0; off < batch.size(); off = off_next) { const int32_t n_tokens = std::min(n_batch, batch.size() - off); try { + scoped_timer t(t_decode, n_decode); // TODO @ngxson : maybe handle n_batch == 1 here instead of inside decode() batch_view = batch.get_view(off, n_tokens); @@ -2686,6 +2728,7 @@ private: } try { + scoped_timer t(t_post_decode, n_post_decode); post_decode(n_tokens, off, batch_view); } catch (const std::exception & e) { SRV_ERR("post_decode() failed: %s\n", e.what());