server : skip checkpoints beyond pos_next (#24411)

* server : skip checkpoints beyond pos_next

* cont : update comment + TODO + ref

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Aldehir Rojas 2026-06-11 02:18:12 -05:00 committed by GitHub
parent ac4cddeb0d
commit db94854ff5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2046,6 +2046,9 @@ private:
auto & cur = slot.prompt.checkpoints.emplace_back();
// [TAG_CHECKPOINTS_FIX_POS_MIN]
// TODO: here we incorrectly deterimne that the saved checkpoint data covers the [pos_min, pos_max] range
// this is not true for SWA models: https://github.com/ggml-org/llama.cpp/pull/24411#issuecomment-4677983225
cur.update_pos(slot.prompt.n_tokens() - n_tokens_cur, pos_min, pos_max);
cur.update_tgt(ctx_tgt, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
@ -2860,6 +2863,10 @@ private:
// guarantee that a checkpoint will result in at least one token being processed [TAG_PROMPT_LOGITS]
LOG_INF("slot %12.*s: id %2d | task %d | Checking checkpoint with [%d, %d] against %d...\n", 12,
func_name, (slot).id, ((slot).task ? (slot).task->id : -1), cur.pos_min, cur.pos_max, pos_min_thold);
// workaround for [TAG_CHECKPOINTS_FIX_POS_MIN]
if (cur.pos_max > pos_next) {
return false;
}
return cur.pos_min < pos_min_thold || cur.pos_min == 0;
}
);