mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
spec : disacard last drafted token with low prob (#1820)
* spec : disacard last drafted token with low prob * Apply suggestion from @ikawrakow Co-authored-by: Kawrakow <iwankawrakow@gmail.com> --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Kawrakow <iwankawrakow@gmail.com>
This commit is contained in:
parent
f43a9f1cf6
commit
104846ddee
@ -471,16 +471,20 @@ struct common_speculative_state_draft : public common_speculative_state {
|
||||
|
||||
common_sampler_accept(smpl, nullptr, id, true);
|
||||
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < params.p_min) {
|
||||
if (i == 0) {
|
||||
result.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
result.push_back(id);
|
||||
|
||||
if (params.n_max <= (int) result.size()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < params.p_min) {
|
||||
break;
|
||||
}
|
||||
|
||||
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);
|
||||
|
||||
|
||||
@ -5032,7 +5032,7 @@ void server_context::update_slots() {
|
||||
// start populating the batch for this iteration
|
||||
common_batch_clear(batch);
|
||||
|
||||
// frist, add sampled tokens from any ongoing sequences
|
||||
// first, add sampled tokens from any ongoing sequences
|
||||
add_sampled_tokens(); // Prepare batch for inference
|
||||
|
||||
// process in chunks of params.n_batch
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user