mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
server : skip checkpoints beyond pos_next (#24411)
* server : skip checkpoints beyond pos_next * cont : update comment + TODO + ref --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
ac4cddeb0d
commit
db94854ff5
@ -2046,6 +2046,9 @@ private:
|
||||
|
||||
auto & cur = slot.prompt.checkpoints.emplace_back();
|
||||
|
||||
// [TAG_CHECKPOINTS_FIX_POS_MIN]
|
||||
// TODO: here we incorrectly deterimne that the saved checkpoint data covers the [pos_min, pos_max] range
|
||||
// this is not true for SWA models: https://github.com/ggml-org/llama.cpp/pull/24411#issuecomment-4677983225
|
||||
cur.update_pos(slot.prompt.n_tokens() - n_tokens_cur, pos_min, pos_max);
|
||||
|
||||
cur.update_tgt(ctx_tgt, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
@ -2860,6 +2863,10 @@ private:
|
||||
// guarantee that a checkpoint will result in at least one token being processed [TAG_PROMPT_LOGITS]
|
||||
LOG_INF("slot %12.*s: id %2d | task %d | Checking checkpoint with [%d, %d] against %d...\n", 12,
|
||||
func_name, (slot).id, ((slot).task ? (slot).task->id : -1), cur.pos_min, cur.pos_max, pos_min_thold);
|
||||
// workaround for [TAG_CHECKPOINTS_FIX_POS_MIN]
|
||||
if (cur.pos_max > pos_next) {
|
||||
return false;
|
||||
}
|
||||
return cur.pos_min < pos_min_thold || cur.pos_min == 0;
|
||||
}
|
||||
);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user