server: fix non-bound n_discard value (ctx shifting) (#24786)

* server: fix non-bound n_discard value * Update tools/server/server-context.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2026-06-27 23:50:20 -05:00 · 2026-06-19 10:53:44 +02:00 · 2026-06-19 10:53:44 +02:00 · 159d093a43
commit 159d093a43
parent 5fd2dc2c41
1 changed files with 4 additions and 1 deletions
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@ -2565,7 +2565,10 @@ private:
                n_keep = std::min(slot.n_ctx - 4, n_keep);

                const int n_left    = slot.prompt.n_tokens() - n_keep;
-                const int n_discard = slot.task->params.n_discard ? slot.task->params.n_discard : (n_left / 2);
+                int       n_discard = slot.task->params.n_discard ? slot.task->params.n_discard : (n_left / 2);
+
+                // ref: https://github.com/ggml-org/llama.cpp/pull/24786
+                n_discard = std::clamp(n_discard, 0, std::max(0, n_left - 1));

                SLT_WRN(slot, "slot context shift, n_keep = %d, n_left = %d, n_discard = %d\n", n_keep, n_left, n_discard);